2019-10-19 08:40:59

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH 00/18] add support for Clang's Shadow Call Stack

This patch series adds support for Clang's Shadow Call Stack (SCS)
mitigation, which uses a separately allocated shadow stack to protect
against return address overwrites. More information can be found here:

https://clang.llvm.org/docs/ShadowCallStack.html

SCS is currently supported only on arm64, where the compiler requires
the x18 register to be reserved for holding the current task's shadow
stack pointer. Because of this, the series includes four patches from
Ard to remove x18 usage from assembly code and to reserve the register
from general allocation.

With -fsanitize=shadow-call-stack, the compiler injects instructions
to all non-leaf C functions to store the return address to the shadow
stack and unconditionally load it again before returning. As a result,
SCS is incompatible with features that rely on modifying function
return addresses to alter control flow, such as function graph tracing
and kretprobes. A copy of the return address is still kept in the
kernel stack for compatibility with stack unwinding, for example.

SCS has a minimal performance overhead, but allocating shadow stacks
increases kernel memory usage. The feature is therefore mostly useful
on hardware that lacks support for PAC instructions. This series adds
a ROP protection choice to the kernel configuration, where other
return address protection options can be selected as they are added to
the kernel.


Ard Biesheuvel (4):
arm64/lib: copy_page: avoid x18 register in assembler code
arm64: kvm: stop treating register x18 as caller save
arm64: kernel: avoid x18 as an arbitrary temp register
arm64: kbuild: reserve reg x18 from general allocation by the compiler

Sami Tolvanen (14):
arm64: mm: don't use x18 in idmap_kpti_install_ng_mappings
add support for Clang's Shadow Call Stack (SCS)
scs: add accounting
scs: add support for stack usage debugging
trace: disable function graph tracing with SCS
kprobes: fix compilation without CONFIG_KRETPROBES
kprobes: disable kretprobes with SCS
arm64: reserve x18 only with Shadow Call Stack
arm64: preserve x18 when CPU is suspended
arm64: efi: restore x18 if it was corrupted
arm64: vdso: disable Shadow Call Stack
arm64: kprobes: fix kprobes without CONFIG_KRETPROBES
arm64: disable SCS for hypervisor code
arm64: implement Shadow Call Stack

Makefile | 6 +
arch/Kconfig | 41 ++++-
arch/arm64/Kconfig | 1 +
arch/arm64/Makefile | 4 +
arch/arm64/include/asm/scs.h | 60 ++++++++
arch/arm64/include/asm/stacktrace.h | 4 +
arch/arm64/include/asm/thread_info.h | 3 +
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +
arch/arm64/kernel/cpu-reset.S | 4 +-
arch/arm64/kernel/efi-rt-wrapper.S | 7 +-
arch/arm64/kernel/entry.S | 23 +++
arch/arm64/kernel/head.S | 9 ++
arch/arm64/kernel/irq.c | 2 +
arch/arm64/kernel/probes/kprobes.c | 2 +
arch/arm64/kernel/process.c | 3 +
arch/arm64/kernel/scs.c | 39 +++++
arch/arm64/kernel/smp.c | 4 +
arch/arm64/kernel/vdso/Makefile | 2 +-
arch/arm64/kvm/hyp/Makefile | 3 +-
arch/arm64/kvm/hyp/entry.S | 12 +-
arch/arm64/lib/copy_page.S | 38 ++---
arch/arm64/mm/proc.S | 69 +++++----
drivers/base/node.c | 6 +
fs/proc/meminfo.c | 4 +
include/linux/compiler-clang.h | 2 +
include/linux/compiler_types.h | 4 +
include/linux/mmzone.h | 3 +
include/linux/scs.h | 88 +++++++++++
init/init_task.c | 6 +
init/main.c | 3 +
kernel/Makefile | 1 +
kernel/fork.c | 9 ++
kernel/kprobes.c | 38 ++---
kernel/sched/core.c | 2 +
kernel/sched/sched.h | 1 +
kernel/scs.c | 221 +++++++++++++++++++++++++++
kernel/trace/Kconfig | 1 +
mm/page_alloc.c | 6 +
mm/vmstat.c | 3 +
40 files changed, 656 insertions(+), 82 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c

--
2.23.0.866.gb869b98d4c-goog


2019-10-19 08:41:03

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH 02/18] arm64/lib: copy_page: avoid x18 register in assembler code

From: Ard Biesheuvel <[email protected]>

Register x18 will no longer be used as a caller save register in the
future, so stop using it in the copy_page() code.

Link: https://patchwork.kernel.org/patch/9836869/
Signed-off-by: Ard Biesheuvel <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/lib/copy_page.S | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index bbb8562396af..8b562264c165 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -34,45 +34,45 @@ alternative_else_nop_endif
ldp x14, x15, [x1, #96]
ldp x16, x17, [x1, #112]

- mov x18, #(PAGE_SIZE - 128)
+ add x0, x0, #256
add x1, x1, #128
1:
- subs x18, x18, #128
+ tst x0, #(PAGE_SIZE - 1)

alternative_if ARM64_HAS_NO_HW_PREFETCH
prfm pldl1strm, [x1, #384]
alternative_else_nop_endif

- stnp x2, x3, [x0]
+ stnp x2, x3, [x0, #-256]
ldp x2, x3, [x1]
- stnp x4, x5, [x0, #16]
+ stnp x4, x5, [x0, #-240]
ldp x4, x5, [x1, #16]
- stnp x6, x7, [x0, #32]
+ stnp x6, x7, [x0, #-224]
ldp x6, x7, [x1, #32]
- stnp x8, x9, [x0, #48]
+ stnp x8, x9, [x0, #-208]
ldp x8, x9, [x1, #48]
- stnp x10, x11, [x0, #64]
+ stnp x10, x11, [x0, #-192]
ldp x10, x11, [x1, #64]
- stnp x12, x13, [x0, #80]
+ stnp x12, x13, [x0, #-176]
ldp x12, x13, [x1, #80]
- stnp x14, x15, [x0, #96]
+ stnp x14, x15, [x0, #-160]
ldp x14, x15, [x1, #96]
- stnp x16, x17, [x0, #112]
+ stnp x16, x17, [x0, #-144]
ldp x16, x17, [x1, #112]

add x0, x0, #128
add x1, x1, #128

- b.gt 1b
+ b.ne 1b

- stnp x2, x3, [x0]
- stnp x4, x5, [x0, #16]
- stnp x6, x7, [x0, #32]
- stnp x8, x9, [x0, #48]
- stnp x10, x11, [x0, #64]
- stnp x12, x13, [x0, #80]
- stnp x14, x15, [x0, #96]
- stnp x16, x17, [x0, #112]
+ stnp x2, x3, [x0, #-256]
+ stnp x4, x5, [x0, #-240]
+ stnp x6, x7, [x0, #-224]
+ stnp x8, x9, [x0, #-208]
+ stnp x10, x11, [x0, #-192]
+ stnp x12, x13, [x0, #-176]
+ stnp x14, x15, [x0, #-160]
+ stnp x16, x17, [x0, #-144]

ret
ENDPROC(copy_page)
--
2.23.0.866.gb869b98d4c-goog

2019-10-19 08:41:05

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH 04/18] arm64: kernel: avoid x18 as an arbitrary temp register

From: Ard Biesheuvel <[email protected]>

The code in __cpu_soft_restart() uses x18 as an arbitrary temp register,
which will shortly be disallowed. So use x8 instead.

Link: https://patchwork.kernel.org/patch/9836877/
Signed-off-by: Ard Biesheuvel <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/kernel/cpu-reset.S | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 6ea337d464c4..32c7bf858dd9 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -42,11 +42,11 @@ ENTRY(__cpu_soft_restart)
mov x0, #HVC_SOFT_RESTART
hvc #0 // no return

-1: mov x18, x1 // entry
+1: mov x8, x1 // entry
mov x0, x2 // arg0
mov x1, x3 // arg1
mov x2, x4 // arg2
- br x18
+ br x8
ENDPROC(__cpu_soft_restart)

.popsection
--
2.23.0.866.gb869b98d4c-goog

2019-10-19 08:41:12

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH 05/18] arm64: kbuild: reserve reg x18 from general allocation by the compiler

From: Ard Biesheuvel <[email protected]>

Before we can start using register x18 for a special purpose (as permitted
by the AAPCS64 ABI), we need to tell the compiler that it is off limits
for general allocation. So tag it as 'fixed', and remove the mention from
the LL/SC compiler flag override.

Link: https://patchwork.kernel.org/patch/9836881/
Signed-off-by: Ard Biesheuvel <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 2c0238ce0551..1c7b276bc7c5 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -55,7 +55,7 @@ endif

KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) \
$(compat_vdso) $(cc_has_k_constraint)
-KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -ffixed-x18
KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) $(compat_vdso)

--
2.23.0.866.gb869b98d4c-goog

2019-10-19 08:41:16

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH 11/18] kprobes: disable kretprobes with SCS

With CONFIG_KRETPROBES, function return addresses are modified to
redirect control flow to kretprobe_trampoline. This is incompatible with
return address protection.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index a222adda8130..4646e3b34925 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -171,7 +171,7 @@ config ARCH_USE_BUILTIN_BSWAP

config KRETPROBES
def_bool y
- depends on KPROBES && HAVE_KRETPROBES
+ depends on KPROBES && HAVE_KRETPROBES && ROP_PROTECTION_NONE

config USER_RETURN_NOTIFIER
bool
--
2.23.0.866.gb869b98d4c-goog

2019-10-19 08:41:28

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH 13/18] arm64: preserve x18 when CPU is suspended

Don't lose the current task's shadow stack when the CPU is suspended.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/mm/proc.S | 6 ++++++
1 file changed, 6 insertions(+)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index fdabf40a83c8..9a8bd4bc8549 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -73,6 +73,9 @@ alternative_endif
stp x8, x9, [x0, #48]
stp x10, x11, [x0, #64]
stp x12, x13, [x0, #80]
+#ifdef CONFIG_SHADOW_CALL_STACK
+ stp x18, xzr, [x0, #96]
+#endif
ret
ENDPROC(cpu_do_suspend)

@@ -89,6 +92,9 @@ ENTRY(cpu_do_resume)
ldp x9, x10, [x0, #48]
ldp x11, x12, [x0, #64]
ldp x13, x14, [x0, #80]
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldp x18, x19, [x0, #96]
+#endif
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
--
2.23.0.866.gb869b98d4c-goog

2019-10-19 08:41:40

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH 07/18] scs: add accounting

This change adds accounting for the memory allocated for shadow stacks.

Signed-off-by: Sami Tolvanen <[email protected]>
---
drivers/base/node.c | 6 ++++++
fs/proc/meminfo.c | 4 ++++
include/linux/mmzone.h | 3 +++
kernel/scs.c | 20 ++++++++++++++++++++
mm/page_alloc.c | 6 ++++++
mm/vmstat.c | 3 +++
6 files changed, 42 insertions(+)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 296546ffed6c..111e58ec231e 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d AnonPages: %8lu kB\n"
"Node %d Shmem: %8lu kB\n"
"Node %d KernelStack: %8lu kB\n"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ "Node %d ShadowCallStack:%8lu kB\n"
+#endif
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
@@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index ac9247371871..df352e4bab90 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB));
+#ifdef CONFIG_SHADOW_CALL_STACK
+ seq_printf(m, "ShadowCallStack:%8lu kB\n",
+ global_zone_page_state(NR_KERNEL_SCS_BYTES) / 1024);
+#endif
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bda20282746b..fcb8c1708f9e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -200,6 +200,9 @@ enum zone_stat_item {
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ NR_KERNEL_SCS_BYTES, /* measured in bytes */
+#endif
/* Second 128 byte cacheline */
NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
diff --git a/kernel/scs.c b/kernel/scs.c
index 47324e8d313b..0e3cba49ea1a 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -7,9 +7,11 @@

#include <linux/cpuhotplug.h>
#include <linux/mm.h>
+#include <linux/mmzone.h>
#include <linux/slab.h>
#include <linux/scs.h>
#include <linux/vmalloc.h>
+#include <linux/vmstat.h>
#include <asm/scs.h>

#define SCS_END_MAGIC 0xaf0194819b1635f6UL
@@ -59,6 +61,11 @@ static void scs_free(void *s)
vfree_atomic(s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return vmalloc_to_page(__scs_base(tsk));
+}
+
static int scs_cleanup(unsigned int cpu)
{
int i;
@@ -92,6 +99,11 @@ static inline void scs_free(void *s)
kmem_cache_free(scs_cache, s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return virt_to_page(__scs_base(tsk));
+}
+
void __init scs_init(void)
{
scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
@@ -128,6 +140,12 @@ void scs_set_init_magic(struct task_struct *tsk)
scs_load(tsk);
}

+static void scs_account(struct task_struct *tsk, int account)
+{
+ mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_BYTES,
+ account * SCS_SIZE);
+}
+
int scs_prepare(struct task_struct *tsk, int node)
{
void *s;
@@ -138,6 +156,7 @@ int scs_prepare(struct task_struct *tsk, int node)

task_set_scs(tsk, s);
scs_set_magic(tsk);
+ scs_account(tsk, 1);

return 0;
}
@@ -157,6 +176,7 @@ void scs_release(struct task_struct *tsk)

WARN_ON(scs_corrupted(tsk));

+ scs_account(tsk, -1);
scs_task_init(tsk);
scs_free(s);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ecc3dbad606b..fe17d69d98a7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5361,6 +5361,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" managed:%lukB"
" mlocked:%lukB"
" kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ " shadow_call_stack:%lukB"
+#endif
" pagetables:%lukB"
" bounce:%lukB"
" free_pcp:%lukB"
@@ -5382,6 +5385,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)),
zone_page_state(zone, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ zone_page_state(zone, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
K(zone_page_state(zone, NR_PAGETABLE)),
K(zone_page_state(zone, NR_BOUNCE)),
K(free_pcp),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6afc892a148a..9fe4afe670fe 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1118,6 +1118,9 @@ const char * const vmstat_text[] = {
"nr_mlock",
"nr_page_table_pages",
"nr_kernel_stack",
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ "nr_shadow_call_stack_bytes",
+#endif
"nr_bounce",
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
--
2.23.0.866.gb869b98d4c-goog

2019-10-19 08:41:43

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH 08/18] scs: add support for stack usage debugging

Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks.

Signed-off-by: Sami Tolvanen <[email protected]>
---
kernel/scs.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)

diff --git a/kernel/scs.c b/kernel/scs.c
index 0e3cba49ea1a..1ec5c5a8dfae 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -161,6 +161,44 @@ int scs_prepare(struct task_struct *tsk, int node)
return 0;
}

+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long scs_used(struct task_struct *tsk)
+{
+ unsigned long *p = __scs_base(tsk);
+ unsigned long *end = scs_magic(tsk);
+ uintptr_t s = (uintptr_t)p;
+
+ while (p < end && *p)
+ p++;
+
+ return (uintptr_t)p - s;
+}
+
+static void scs_check_usage(struct task_struct *tsk)
+{
+ static DEFINE_SPINLOCK(lock);
+ static unsigned long highest;
+ unsigned long used = scs_used(tsk);
+
+ if (used <= highest)
+ return;
+
+ spin_lock(&lock);
+
+ if (used > highest) {
+ pr_info("%s: highest shadow stack usage %lu bytes\n",
+ __func__, used);
+ highest = used;
+ }
+
+ spin_unlock(&lock);
+}
+#else
+static inline void scs_check_usage(struct task_struct *tsk)
+{
+}
+#endif
+
bool scs_corrupted(struct task_struct *tsk)
{
return *scs_magic(tsk) != SCS_END_MAGIC;
@@ -175,6 +213,7 @@ void scs_release(struct task_struct *tsk)
return;

WARN_ON(scs_corrupted(tsk));
+ scs_check_usage(tsk);

scs_account(tsk, -1);
scs_task_init(tsk);
--
2.23.0.866.gb869b98d4c-goog

2019-10-19 08:41:44

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH 14/18] arm64: efi: restore x18 if it was corrupted

If we detect a corrupted x18 and SCS is enabled, restore the register
before jumping back to instrumented code.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/kernel/efi-rt-wrapper.S | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 3fc71106cb2b..945744f16086 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -34,5 +34,10 @@ ENTRY(__efi_rt_asm_wrapper)
ldp x29, x30, [sp], #32
b.ne 0f
ret
-0: b efi_handle_corrupted_x18 // tail call
+0:
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* Restore x18 before returning to instrumented code. */
+ mov x18, x2
+#endif
+ b efi_handle_corrupted_x18 // tail call
ENDPROC(__efi_rt_asm_wrapper)
--
2.23.0.866.gb869b98d4c-goog

2019-10-19 08:41:46

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH 10/18] kprobes: fix compilation without CONFIG_KRETPROBES

kprobe_on_func_entry and arch_kprobe_on_func_entry need to be available
even if CONFIG_KRETPROBES is not selected.

Signed-off-by: Sami Tolvanen <[email protected]>
---
kernel/kprobes.c | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 53534aa258a6..b5e20a4669b8 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1829,6 +1829,25 @@ unsigned long __weak arch_deref_entry_point(void *entry)
return (unsigned long)entry;
}

+bool __weak arch_kprobe_on_func_entry(unsigned long offset)
+{
+ return !offset;
+}
+
+bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
+{
+ kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
+
+ if (IS_ERR(kp_addr))
+ return false;
+
+ if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
+ !arch_kprobe_on_func_entry(offset))
+ return false;
+
+ return true;
+}
+
#ifdef CONFIG_KRETPROBES
/*
* This kprobe pre_handler is registered with every kretprobe. When probe
@@ -1885,25 +1904,6 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(pre_handler_kretprobe);

-bool __weak arch_kprobe_on_func_entry(unsigned long offset)
-{
- return !offset;
-}
-
-bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
-{
- kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
-
- if (IS_ERR(kp_addr))
- return false;
-
- if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
- !arch_kprobe_on_func_entry(offset))
- return false;
-
- return true;
-}
-
int register_kretprobe(struct kretprobe *rp)
{
int ret = 0;
--
2.23.0.866.gb869b98d4c-goog

2019-10-19 08:41:51

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH 12/18] arm64: reserve x18 only with Shadow Call Stack

Only reserve x18 with CONFIG_SHADOW_CALL_STACK. Note that all external
kernel modules must also have x18 reserved if the kernel uses SCS.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/Makefile | 6 +++++-
1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 1c7b276bc7c5..ef76101201b2 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -55,7 +55,7 @@ endif

KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) \
$(compat_vdso) $(cc_has_k_constraint)
-KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -ffixed-x18
+KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) $(compat_vdso)

@@ -72,6 +72,10 @@ stack_protector_prepare: prepare0
include/generated/asm-offsets.h))
endif

+ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
+KBUILD_CFLAGS += -ffixed-x18
+endif
+
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
CHECKFLAGS += -D__AARCH64EB__
--
2.23.0.866.gb869b98d4c-goog

2019-10-19 08:42:03

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH 16/18] arm64: kprobes: fix kprobes without CONFIG_KRETPROBES

This allows CONFIG_KRETPROBES to be disabled without disabling
kprobes entirely.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/kernel/probes/kprobes.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index c4452827419b..98230ae979ca 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -551,6 +551,7 @@ void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
return (void *)orig_ret_address;
}

+#ifdef CONFIG_KRETPROBES
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
@@ -564,6 +565,7 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
{
return 0;
}
+#endif

int __init arch_init_kprobes(void)
{
--
2.23.0.866.gb869b98d4c-goog

2019-10-19 08:49:32

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH 13/18] arm64: preserve x18 when CPU is suspended

On Fri, Oct 18, 2019 at 9:11 AM Sami Tolvanen <[email protected]> wrote:
>
> Don't lose the current task's shadow stack when the CPU is suspended.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> arch/arm64/mm/proc.S | 6 ++++++
> 1 file changed, 6 insertions(+)
>
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index fdabf40a83c8..9a8bd4bc8549 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -73,6 +73,9 @@ alternative_endif
> stp x8, x9, [x0, #48]
> stp x10, x11, [x0, #64]
> stp x12, x13, [x0, #80]
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + stp x18, xzr, [x0, #96]

Could this be a str/ldr of just x18 rather than stp/ldp of x18 +
garbage? Maybe there's no real cost difference, or some kind of
alignment invariant?

> +#endif
> ret
> ENDPROC(cpu_do_suspend)
>
> @@ -89,6 +92,9 @@ ENTRY(cpu_do_resume)
> ldp x9, x10, [x0, #48]
> ldp x11, x12, [x0, #64]
> ldp x13, x14, [x0, #80]
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + ldp x18, x19, [x0, #96]
> +#endif
> msr tpidr_el0, x2
> msr tpidrro_el0, x3
> msr contextidr_el1, x4
> --
> 2.23.0.866.gb869b98d4c-goog
>


--
Thanks,
~Nick Desaulniers

2019-10-19 08:50:01

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 10/18] kprobes: fix compilation without CONFIG_KRETPROBES


Added Masami who's the maintainer of kprobes.

-- Steve


On Fri, 18 Oct 2019 09:10:25 -0700
Sami Tolvanen <[email protected]> wrote:

> kprobe_on_func_entry and arch_kprobe_on_func_entry need to be available
> even if CONFIG_KRETPROBES is not selected.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> kernel/kprobes.c | 38 +++++++++++++++++++-------------------
> 1 file changed, 19 insertions(+), 19 deletions(-)
>
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 53534aa258a6..b5e20a4669b8 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -1829,6 +1829,25 @@ unsigned long __weak arch_deref_entry_point(void *entry)
> return (unsigned long)entry;
> }
>
> +bool __weak arch_kprobe_on_func_entry(unsigned long offset)
> +{
> + return !offset;
> +}
> +
> +bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
> +{
> + kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
> +
> + if (IS_ERR(kp_addr))
> + return false;
> +
> + if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
> + !arch_kprobe_on_func_entry(offset))
> + return false;
> +
> + return true;
> +}
> +
> #ifdef CONFIG_KRETPROBES
> /*
> * This kprobe pre_handler is registered with every kretprobe. When probe
> @@ -1885,25 +1904,6 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
> }
> NOKPROBE_SYMBOL(pre_handler_kretprobe);
>
> -bool __weak arch_kprobe_on_func_entry(unsigned long offset)
> -{
> - return !offset;
> -}
> -
> -bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
> -{
> - kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
> -
> - if (IS_ERR(kp_addr))
> - return false;
> -
> - if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
> - !arch_kprobe_on_func_entry(offset))
> - return false;
> -
> - return true;
> -}
> -
> int register_kretprobe(struct kretprobe *rp)
> {
> int ret = 0;

2019-10-19 08:52:23

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH 11/18] kprobes: disable kretprobes with SCS


[ Added Masami ]

On Fri, 18 Oct 2019 09:10:26 -0700
Sami Tolvanen <[email protected]> wrote:

> With CONFIG_KRETPROBES, function return addresses are modified to
> redirect control flow to kretprobe_trampoline. This is incompatible with
> return address protection.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> arch/Kconfig | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index a222adda8130..4646e3b34925 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -171,7 +171,7 @@ config ARCH_USE_BUILTIN_BSWAP
>
> config KRETPROBES
> def_bool y
> - depends on KPROBES && HAVE_KRETPROBES
> + depends on KPROBES && HAVE_KRETPROBES && ROP_PROTECTION_NONE

Again, this belongs in the arch code.

-- Steve

>
> config USER_RETURN_NOTIFIER
> bool


diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 41a9b4257b72..65557d7e6b5e 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -166,7 +166,7 @@ config ARM64
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KPROBES
- select HAVE_KRETPROBES
+ select HAVE_KRETPROBES if ROP_PROTECTION_NONE
select HAVE_GENERIC_VDSO
select IOMMU_DMA if IOMMU_SUPPORT
select IRQ_DOMAIN

2019-10-19 08:52:53

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH 13/18] arm64: preserve x18 when CPU is suspended

On Fri, Oct 18, 2019 at 9:49 AM Nick Desaulniers
<[email protected]> wrote:
> > diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> > index fdabf40a83c8..9a8bd4bc8549 100644
> > --- a/arch/arm64/mm/proc.S
> > +++ b/arch/arm64/mm/proc.S
> > @@ -73,6 +73,9 @@ alternative_endif
> > stp x8, x9, [x0, #48]
> > stp x10, x11, [x0, #64]
> > stp x12, x13, [x0, #80]
> > +#ifdef CONFIG_SHADOW_CALL_STACK
> > + stp x18, xzr, [x0, #96]
>
> Could this be a str/ldr of just x18 rather than stp/ldp of x18 +
> garbage? Maybe there's no real cost difference, or some kind of
> alignment invariant?

Sure, this can be changed to str/ldr. I don't think there's a
noticeable difference in cost.

Sami

2019-10-19 08:58:45

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH 05/18] arm64: kbuild: reserve reg x18 from general allocation by the compiler

On Fri, Oct 18, 2019 at 9:11 AM Sami Tolvanen <[email protected]> wrote:
>
> From: Ard Biesheuvel <[email protected]>
>
> Before we can start using register x18 for a special purpose (as permitted
> by the AAPCS64 ABI), we need to tell the compiler that it is off limits
> for general allocation. So tag it as 'fixed',

yep, but...

> and remove the mention from
> the LL/SC compiler flag override.

was that cut/dropped from this patch?

>
> Link: https://patchwork.kernel.org/patch/9836881/

^ Looks like it. Maybe it doesn't matter, but if sending a V2, maybe
the commit message to be updated?

> Signed-off-by: Ard Biesheuvel <[email protected]>
> Signed-off-by: Sami Tolvanen <[email protected]>

If sending a V2 with the above cleaned up, you may also include:
Reviewed-by: Nick Desaulniers <[email protected]>

I like how this does not conditionally reserve it based on the CONFIG
for SCS. Hopefully later patches don't wrap it, but I haven't looked
through all of them yet.

> ---
> arch/arm64/Makefile | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
> index 2c0238ce0551..1c7b276bc7c5 100644
> --- a/arch/arm64/Makefile
> +++ b/arch/arm64/Makefile
> @@ -55,7 +55,7 @@ endif
>
> KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) \
> $(compat_vdso) $(cc_has_k_constraint)
> -KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
> +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -ffixed-x18
> KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
> KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) $(compat_vdso)
>
> --
> 2.23.0.866.gb869b98d4c-goog
>


--
Thanks,
~Nick Desaulniers

2019-10-19 09:11:06

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH 05/18] arm64: kbuild: reserve reg x18 from general allocation by the compiler

On Fri, Oct 18, 2019 at 10:32 AM 'Nick Desaulniers' via Clang Built
Linux <[email protected]> wrote:
> > and remove the mention from
> > the LL/SC compiler flag override.
>
> was that cut/dropped from this patch?
>
> >
> > Link: https://patchwork.kernel.org/patch/9836881/
>
> ^ Looks like it. Maybe it doesn't matter, but if sending a V2, maybe
> the commit message to be updated?

True. The original patch is from 2017 and the relevant part of
arm64/lib/Makefile no longer exists. I'll update this accordingly.

> I like how this does not conditionally reserve it based on the CONFIG
> for SCS. Hopefully later patches don't wrap it, but I haven't looked
> through all of them yet.

In a later patch x18 is only reserved with SCS. I'm fine with dropping
that patch and reserving it always, but wouldn't mind hearing thoughts
from the maintainers about this first.

Sami

2019-10-19 09:21:13

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH 12/18] arm64: reserve x18 only with Shadow Call Stack

On Fri, Oct 18, 2019 at 9:11 AM 'Sami Tolvanen' via Clang Built Linux
<[email protected]> wrote:
>
> Only reserve x18 with CONFIG_SHADOW_CALL_STACK. Note that all external
> kernel modules must also have x18 reserved if the kernel uses SCS.

Ah, ok. The tradeoff for maintainers to consider, either:
1. one less GPR for ALL kernel code or
2. remember not to use x18 in inline as lest you potentially break SCS

This patch is 2 (the earlier patch was 1). Maybe we don't write
enough inline asm that this will be hard to remember, and we do have
CI in Android to watch for this (on mainline, not sure about -next).

Either way,
Acked-by: Nick Desaulniers <[email protected]>

>
> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> arch/arm64/Makefile | 6 +++++-
> 1 file changed, 5 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
> index 1c7b276bc7c5..ef76101201b2 100644
> --- a/arch/arm64/Makefile
> +++ b/arch/arm64/Makefile
> @@ -55,7 +55,7 @@ endif
>
> KBUILD_CFLAGS += -mgeneral-regs-only $(lseinstr) $(brokengasinst) \
> $(compat_vdso) $(cc_has_k_constraint)
> -KBUILD_CFLAGS += -fno-asynchronous-unwind-tables -ffixed-x18
> +KBUILD_CFLAGS += -fno-asynchronous-unwind-tables
> KBUILD_CFLAGS += $(call cc-disable-warning, psabi)
> KBUILD_AFLAGS += $(lseinstr) $(brokengasinst) $(compat_vdso)
>
> @@ -72,6 +72,10 @@ stack_protector_prepare: prepare0
> include/generated/asm-offsets.h))
> endif
>
> +ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
> +KBUILD_CFLAGS += -ffixed-x18
> +endif
> +
> ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
> KBUILD_CPPFLAGS += -mbig-endian
> CHECKFLAGS += -D__AARCH64EB__
> --
> 2.23.0.866.gb869b98d4c-goog
>
> --
> You received this message because you are subscribed to the Google Groups "Clang Built Linux" group.
> To unsubscribe from this group and stop receiving emails from it, send an email to [email protected].
> To view this discussion on the web visit https://groups.google.com/d/msgid/clang-built-linux/20191018161033.261971-13-samitolvanen%40google.com.



--
Thanks,
~Nick Desaulniers

2019-10-21 06:14:09

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH 05/18] arm64: kbuild: reserve reg x18 from general allocation by the compiler

On Fri, 18 Oct 2019 at 21:00, Sami Tolvanen <[email protected]> wrote:
>
> On Fri, Oct 18, 2019 at 10:32 AM 'Nick Desaulniers' via Clang Built
> Linux <[email protected]> wrote:
> > > and remove the mention from
> > > the LL/SC compiler flag override.
> >
> > was that cut/dropped from this patch?
> >
> > >
> > > Link: https://patchwork.kernel.org/patch/9836881/
> >
> > ^ Looks like it. Maybe it doesn't matter, but if sending a V2, maybe
> > the commit message to be updated?
>
> True. The original patch is from 2017 and the relevant part of
> arm64/lib/Makefile no longer exists. I'll update this accordingly.
>
> > I like how this does not conditionally reserve it based on the CONFIG
> > for SCS. Hopefully later patches don't wrap it, but I haven't looked
> > through all of them yet.
>
> In a later patch x18 is only reserved with SCS. I'm fine with dropping
> that patch and reserving it always, but wouldn't mind hearing thoughts
> from the maintainers about this first.
>

Why would you reserve x18 if SCS is disabled? Given that this is a
choice that is made at code generation time, there is no justification
for always reserving it, since it will never be used for anything. (Of
course, this applies to generated code only - .S files should simply
be updated to avoid x18 altogether)

Also, please combine this patch with the one that reserves it
conditionally, no point in having both in the same series.

2019-10-21 06:24:07

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH 14/18] arm64: efi: restore x18 if it was corrupted

On Fri, 18 Oct 2019 at 18:11, Sami Tolvanen <[email protected]> wrote:
>
> If we detect a corrupted x18 and SCS is enabled, restore the register
> before jumping back to instrumented code.
>

You'll have to elaborate a bit here and explain that this is
sufficient, given that we run EFI runtime services with interrupts
enabled.

> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> arch/arm64/kernel/efi-rt-wrapper.S | 7 ++++++-
> 1 file changed, 6 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
> index 3fc71106cb2b..945744f16086 100644
> --- a/arch/arm64/kernel/efi-rt-wrapper.S
> +++ b/arch/arm64/kernel/efi-rt-wrapper.S
> @@ -34,5 +34,10 @@ ENTRY(__efi_rt_asm_wrapper)
> ldp x29, x30, [sp], #32
> b.ne 0f
> ret
> -0: b efi_handle_corrupted_x18 // tail call
> +0:
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + /* Restore x18 before returning to instrumented code. */
> + mov x18, x2
> +#endif
> + b efi_handle_corrupted_x18 // tail call
> ENDPROC(__efi_rt_asm_wrapper)
> --
> 2.23.0.866.gb869b98d4c-goog
>

2019-10-21 06:24:43

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH 16/18] arm64: kprobes: fix kprobes without CONFIG_KRETPROBES

On Fri, 18 Oct 2019 at 18:11, Sami Tolvanen <[email protected]> wrote:
>
> This allows CONFIG_KRETPROBES to be disabled without disabling
> kprobes entirely.
>
> Signed-off-by: Sami Tolvanen <[email protected]>

Can we make kretprobes work with the shadow call stack instead?

> ---
> arch/arm64/kernel/probes/kprobes.c | 2 ++
> 1 file changed, 2 insertions(+)
>
> diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
> index c4452827419b..98230ae979ca 100644
> --- a/arch/arm64/kernel/probes/kprobes.c
> +++ b/arch/arm64/kernel/probes/kprobes.c
> @@ -551,6 +551,7 @@ void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
> return (void *)orig_ret_address;
> }
>
> +#ifdef CONFIG_KRETPROBES
> void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
> struct pt_regs *regs)
> {
> @@ -564,6 +565,7 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
> {
> return 0;
> }
> +#endif
>
> int __init arch_init_kprobes(void)
> {
> --
> 2.23.0.866.gb869b98d4c-goog
>

2019-10-21 09:14:41

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH 10/18] kprobes: fix compilation without CONFIG_KRETPROBES

On Fri, 18 Oct 2019 13:02:57 -0400
Steven Rostedt <[email protected]> wrote:

>
> Added Masami who's the maintainer of kprobes.
>
> -- Steve
>
>
> On Fri, 18 Oct 2019 09:10:25 -0700
> Sami Tolvanen <[email protected]> wrote:
>
> > kprobe_on_func_entry and arch_kprobe_on_func_entry need to be available
> > even if CONFIG_KRETPROBES is not selected.

Good catch! Since nowadays all arch supports kretprobes, I've missed to
test it.

Acked-by: Masami Hiramatsu <[email protected]>

Thank you,

> >
> > Signed-off-by: Sami Tolvanen <[email protected]>
> > ---
> > kernel/kprobes.c | 38 +++++++++++++++++++-------------------
> > 1 file changed, 19 insertions(+), 19 deletions(-)
> >
> > diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> > index 53534aa258a6..b5e20a4669b8 100644
> > --- a/kernel/kprobes.c
> > +++ b/kernel/kprobes.c
> > @@ -1829,6 +1829,25 @@ unsigned long __weak arch_deref_entry_point(void *entry)
> > return (unsigned long)entry;
> > }
> >
> > +bool __weak arch_kprobe_on_func_entry(unsigned long offset)
> > +{
> > + return !offset;
> > +}
> > +
> > +bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
> > +{
> > + kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
> > +
> > + if (IS_ERR(kp_addr))
> > + return false;
> > +
> > + if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
> > + !arch_kprobe_on_func_entry(offset))
> > + return false;
> > +
> > + return true;
> > +}
> > +
> > #ifdef CONFIG_KRETPROBES
> > /*
> > * This kprobe pre_handler is registered with every kretprobe. When probe
> > @@ -1885,25 +1904,6 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
> > }
> > NOKPROBE_SYMBOL(pre_handler_kretprobe);
> >
> > -bool __weak arch_kprobe_on_func_entry(unsigned long offset)
> > -{
> > - return !offset;
> > -}
> > -
> > -bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
> > -{
> > - kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
> > -
> > - if (IS_ERR(kp_addr))
> > - return false;
> > -
> > - if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
> > - !arch_kprobe_on_func_entry(offset))
> > - return false;
> > -
> > - return true;
> > -}
> > -
> > int register_kretprobe(struct kretprobe *rp)
> > {
> > int ret = 0;
>


--
Masami Hiramatsu <[email protected]>

2019-10-21 09:17:43

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH 11/18] kprobes: disable kretprobes with SCS

On Fri, 18 Oct 2019 13:04:29 -0400
Steven Rostedt <[email protected]> wrote:

>
> [ Added Masami ]
>
> On Fri, 18 Oct 2019 09:10:26 -0700
> Sami Tolvanen <[email protected]> wrote:
>
> > With CONFIG_KRETPROBES, function return addresses are modified to
> > redirect control flow to kretprobe_trampoline. This is incompatible with
> > return address protection.
> >
> > Signed-off-by: Sami Tolvanen <[email protected]>
> > ---
> > arch/Kconfig | 2 +-
> > 1 file changed, 1 insertion(+), 1 deletion(-)
> >
> > diff --git a/arch/Kconfig b/arch/Kconfig
> > index a222adda8130..4646e3b34925 100644
> > --- a/arch/Kconfig
> > +++ b/arch/Kconfig
> > @@ -171,7 +171,7 @@ config ARCH_USE_BUILTIN_BSWAP
> >
> > config KRETPROBES
> > def_bool y
> > - depends on KPROBES && HAVE_KRETPROBES
> > + depends on KPROBES && HAVE_KRETPROBES && ROP_PROTECTION_NONE
>
> Again, this belongs in the arch code.

+1, below patch (from Steve) looks good to me.

Thank you,

>
> -- Steve
>
> >
> > config USER_RETURN_NOTIFIER
> > bool
>
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 41a9b4257b72..65557d7e6b5e 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -166,7 +166,7 @@ config ARM64
> select HAVE_STACKPROTECTOR
> select HAVE_SYSCALL_TRACEPOINTS
> select HAVE_KPROBES
> - select HAVE_KRETPROBES
> + select HAVE_KRETPROBES if ROP_PROTECTION_NONE
> select HAVE_GENERIC_VDSO
> select IOMMU_DMA if IOMMU_SUPPORT
> select IRQ_DOMAIN


--
Masami Hiramatsu <[email protected]>

2019-10-21 09:29:35

by Masami Hiramatsu

[permalink] [raw]
Subject: Re: [PATCH 00/18] add support for Clang's Shadow Call Stack

Hi,

On Fri, 18 Oct 2019 09:10:15 -0700
Sami Tolvanen <[email protected]> wrote:

> This patch series adds support for Clang's Shadow Call Stack (SCS)
> mitigation, which uses a separately allocated shadow stack to protect
> against return address overwrites. More information can be found here:
>
> https://clang.llvm.org/docs/ShadowCallStack.html

Looks interesting, and like what function-graph tracing does...

>
> SCS is currently supported only on arm64, where the compiler requires
> the x18 register to be reserved for holding the current task's shadow
> stack pointer. Because of this, the series includes four patches from
> Ard to remove x18 usage from assembly code and to reserve the register
> from general allocation.
>
> With -fsanitize=shadow-call-stack, the compiler injects instructions
> to all non-leaf C functions to store the return address to the shadow
> stack and unconditionally load it again before returning. As a result,
> SCS is incompatible with features that rely on modifying function
> return addresses to alter control flow, such as function graph tracing
> and kretprobes. A copy of the return address is still kept in the
> kernel stack for compatibility with stack unwinding, for example.

Is it possible that kretprobes and function graph tracing modify the
SCS directly instead of changing real stack in that case?

Thank you,

--
Masami Hiramatsu <[email protected]>

2019-10-21 16:08:21

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH 16/18] arm64: kprobes: fix kprobes without CONFIG_KRETPROBES

On Mon, Oct 21, 2019 at 08:21:48AM +0200, Ard Biesheuvel wrote:
> On Fri, 18 Oct 2019 at 18:11, Sami Tolvanen <[email protected]> wrote:
> >
> > This allows CONFIG_KRETPROBES to be disabled without disabling
> > kprobes entirely.
> >
> > Signed-off-by: Sami Tolvanen <[email protected]>
>
> Can we make kretprobes work with the shadow call stack instead?

I've been viewing that as "next steps". This series is the first step:
actually gaining the feature and clearly indicating where future
improvements can live.

--
Kees Cook

2019-10-21 16:58:16

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH 13/18] arm64: preserve x18 when CPU is suspended

On Fri, Oct 18, 2019 at 09:10:28AM -0700, Sami Tolvanen wrote:
> Don't lose the current task's shadow stack when the CPU is suspended.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> arch/arm64/mm/proc.S | 6 ++++++
> 1 file changed, 6 insertions(+)
>
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index fdabf40a83c8..9a8bd4bc8549 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -73,6 +73,9 @@ alternative_endif
> stp x8, x9, [x0, #48]
> stp x10, x11, [x0, #64]
> stp x12, x13, [x0, #80]
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + stp x18, xzr, [x0, #96]
> +#endif

This should have a corresponding change to cpu_suspend_ctx in
<asm/suspend.h>. Otherwise we're corrupting a portion of the stack.

Mark.

> ret
> ENDPROC(cpu_do_suspend)
>
> @@ -89,6 +92,9 @@ ENTRY(cpu_do_resume)
> ldp x9, x10, [x0, #48]
> ldp x11, x12, [x0, #64]
> ldp x13, x14, [x0, #80]
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + ldp x18, x19, [x0, #96]
> +#endif
> msr tpidr_el0, x2
> msr tpidrro_el0, x3
> msr contextidr_el1, x4
> --
> 2.23.0.866.gb869b98d4c-goog
>

2019-10-21 20:44:08

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH 05/18] arm64: kbuild: reserve reg x18 from general allocation by the compiler

On Sun, Oct 20, 2019 at 11:12 PM Ard Biesheuvel
<[email protected]> wrote:
> Also, please combine this patch with the one that reserves it
> conditionally, no point in having both in the same series.

Sure, I'll just drop this patch from v2 then and only reserve it with SCS.

Sami

2019-10-21 22:40:39

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH 14/18] arm64: efi: restore x18 if it was corrupted

On Sun, Oct 20, 2019 at 11:20 PM Ard Biesheuvel
<[email protected]> wrote:
> You'll have to elaborate a bit here and explain that this is
> sufficient, given that we run EFI runtime services with interrupts
> enabled.

I can add a note about this in v2. This is called with preemption
disabled and we have a separate interrupt shadow stack, so as far as I
can tell, this should be sufficient. Did you have concerns about this?

Sami

2019-10-21 22:46:36

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH 13/18] arm64: preserve x18 when CPU is suspended

On Mon, Oct 21, 2019 at 9:56 AM Mark Rutland <[email protected]> wrote:
> This should have a corresponding change to cpu_suspend_ctx in
> <asm/suspend.h>. Otherwise we're corrupting a portion of the stack.

Ugh, correct. I'll fix this in the next version. Thanks.

Sami

2019-10-22 05:55:20

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH 14/18] arm64: efi: restore x18 if it was corrupted

On Tue, 22 Oct 2019 at 00:40, Sami Tolvanen <[email protected]> wrote:
>
> On Sun, Oct 20, 2019 at 11:20 PM Ard Biesheuvel
> <[email protected]> wrote:
> > You'll have to elaborate a bit here and explain that this is
> > sufficient, given that we run EFI runtime services with interrupts
> > enabled.
>
> I can add a note about this in v2. This is called with preemption
> disabled and we have a separate interrupt shadow stack, so as far as I
> can tell, this should be sufficient. Did you have concerns about this?
>

No concerns, but we should put the above clarification in the commit log.

2019-10-22 16:31:07

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH 12/18] arm64: reserve x18 only with Shadow Call Stack

On Fri, Oct 18, 2019 at 02:23:10PM -0700, Nick Desaulniers wrote:
> On Fri, Oct 18, 2019 at 9:11 AM 'Sami Tolvanen' via Clang Built Linux
> <[email protected]> wrote:
> >
> > Only reserve x18 with CONFIG_SHADOW_CALL_STACK. Note that all external
> > kernel modules must also have x18 reserved if the kernel uses SCS.
>
> Ah, ok. The tradeoff for maintainers to consider, either:
> 1. one less GPR for ALL kernel code or
> 2. remember not to use x18 in inline as lest you potentially break SCS

This option only affects compiler-generated code, so I don't think that
matters.

I think it's fine to say that we should always avoid the use of x18 in
hand-written assembly (with manual register allocation), while also
allowing the compiler to use x18 if we're not using SCS.

This can be folded into the earlier patch which always reserved x18.

> This patch is 2 (the earlier patch was 1). Maybe we don't write
> enough inline asm that this will be hard to remember, and we do have
> CI in Android to watch for this (on mainline, not sure about -next).

I think that we can trust the set of people who regularly review arm64
assembly to remember this. We could also document this somewhere -- we
might need to document other constraints or conventions for assembly
in preparation for livepatching and so on.

If we wanted to, we could periodically grep for x18 to find any illicit
usage.

Thanks,
Mark.

2019-10-22 17:04:56

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH 13/18] arm64: preserve x18 when CPU is suspended

On Mon, Oct 21, 2019 at 03:43:14PM -0700, Sami Tolvanen wrote:
> On Mon, Oct 21, 2019 at 9:56 AM Mark Rutland <[email protected]> wrote:
> > This should have a corresponding change to cpu_suspend_ctx in
> > <asm/suspend.h>. Otherwise we're corrupting a portion of the stack.
>
> Ugh, correct. I'll fix this in the next version. Thanks.

It's probably worth extending the comment above cpu_do_suspend to say:

| This must be kept in sync with struct cpu_suspend_ctx in
| <asm/suspend.h>

... to match what we have above struct cpu_suspend_ctx, and make this
more obvious in future.

Thanks,
Mark.

2019-10-22 21:27:26

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH 12/18] arm64: reserve x18 only with Shadow Call Stack

On Tue, Oct 22, 2019 at 05:00:10PM +0100, Mark Rutland wrote:
> If we wanted to, we could periodically grep for x18 to find any illicit
> usage.

Now we need objtool for arm64! :) (It seems CONFIG_HAVE_STACK_VALIDATION
is rather a narrow description for what objtool does now...)

--
Kees Cook

2019-10-25 19:13:02

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 00/17] add support for Clang's Shadow Call Stack

This patch series adds support for Clang's Shadow Call Stack
(SCS) mitigation, which uses a separately allocated shadow stack
to protect against return address overwrites. More information
can be found here:

https://clang.llvm.org/docs/ShadowCallStack.html

SCS provides better protection against traditional buffer
overflows than CONFIG_STACKPROTECTOR_*, but it should be noted
that SCS security guarantees in the kernel differ from the ones
documented for user space. The kernel must store addresses of
shadow stacks used by other tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

SCS is currently supported only on arm64, where the compiler
requires the x18 register to be reserved for holding the current
task's shadow stack pointer. Because of this, the series includes
patches from Ard to remove x18 usage from assembly code.

With -fsanitize=shadow-call-stack, the compiler injects
instructions to all non-leaf C functions to store the return
address to the shadow stack, and unconditionally load it again
before returning. As a result, SCS is currently incompatible
with features that rely on modifying function return addresses
to alter control flow, such as function graph tracing and
kretprobes, although it may be possible to later change these
feature to modify the shadow stack instead. A copy of the return
address is still kept in the kernel stack for compatibility with
stack unwinding, for example.

SCS has a minimal performance overhead, but allocating
shadow stacks increases kernel memory usage. The feature is
therefore mostly useful on hardware that lacks support for PAC
instructions.

Changes in v2:
- Changed Ard's KVM patch to use x29 instead of x18 for the
guest context, which makes restore_callee_saved_regs cleaner
- Updated help text (and commit messages) to point out
differences in security properties compared to user space SCS
- Cleaned up config options: removed the ROP protection choice,
replaced the CC_IS_CLANG dependency with an arch-specific
cc-option test, and moved disabling of incompatible config
options to an arch-specific Kconfig
- Added CC_FLAGS_SCS, which are filtered out where needed
instead of using DISABLE_SCS
- Added a __has_feature guard around __noscs for older clang
versions
- Changed the shadow stack overflow check for vmapped SCS to
use SCS_SIZE to avoid surprises when changing configs
- Renamed SCS_GFP to GFP_SCS
- Dropped the patch to reserve x18 unconditionally, it's now
only reserved with SCS
- Added a clarification why restoring x18 in the EFI RT
wrapper is safe
- Added a missing change to arch/arm64/include/asm/suspend.h,
and a comment to arch/arm64/mm/proc.S to remind that struct
cpu_suspend_ctx must be kept in sync with the code
- Moved x18 loading/storing during a context switch to
cpu_switch_to(), renamed scs_thread_switch() to
scs_overflow_check(), and removed the now unused scs_load()
- Added compile-time initialization for init_shadow_call_stack
and removed scs_set_init_magic()


Ard Biesheuvel (2):
arm64/lib: copy_page: avoid x18 register in assembler code
arm64: kernel: avoid x18 as an arbitrary temp register

Sami Tolvanen (15):
arm64: mm: don't use x18 in idmap_kpti_install_ng_mappings
arm64: kvm: stop treating register x18 as caller save
add support for Clang's Shadow Call Stack (SCS)
scs: add accounting
scs: add support for stack usage debugging
kprobes: fix compilation without CONFIG_KRETPROBES
arm64: disable function graph tracing with SCS
arm64: disable kretprobes with SCS
arm64: reserve x18 from general allocation with SCS
arm64: preserve x18 when CPU is suspended
arm64: efi: restore x18 if it was corrupted
arm64: vdso: disable Shadow Call Stack
arm64: kprobes: fix kprobes without CONFIG_KRETPROBES
arm64: disable SCS for hypervisor code
arm64: implement Shadow Call Stack

Makefile | 6 +
arch/Kconfig | 33 +++++
arch/arm64/Kconfig | 9 +-
arch/arm64/Makefile | 4 +
arch/arm64/include/asm/scs.h | 45 ++++++
arch/arm64/include/asm/stacktrace.h | 4 +
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/include/asm/thread_info.h | 3 +
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +
arch/arm64/kernel/cpu-reset.S | 4 +-
arch/arm64/kernel/efi-rt-wrapper.S | 7 +-
arch/arm64/kernel/entry.S | 28 ++++
arch/arm64/kernel/head.S | 9 ++
arch/arm64/kernel/irq.c | 2 +
arch/arm64/kernel/probes/kprobes.c | 2 +
arch/arm64/kernel/process.c | 2 +
arch/arm64/kernel/scs.c | 39 +++++
arch/arm64/kernel/smp.c | 4 +
arch/arm64/kernel/vdso/Makefile | 2 +-
arch/arm64/kvm/hyp/Makefile | 3 +
arch/arm64/kvm/hyp/entry.S | 41 +++--
arch/arm64/lib/copy_page.S | 38 ++---
arch/arm64/mm/proc.S | 72 +++++----
drivers/base/node.c | 6 +
fs/proc/meminfo.c | 4 +
include/linux/compiler-clang.h | 6 +
include/linux/compiler_types.h | 4 +
include/linux/mmzone.h | 3 +
include/linux/scs.h | 78 ++++++++++
init/init_task.c | 8 +
kernel/Makefile | 1 +
kernel/fork.c | 9 ++
kernel/kprobes.c | 38 ++---
kernel/sched/core.c | 2 +
kernel/sched/sched.h | 1 +
kernel/scs.c | 214 +++++++++++++++++++++++++++
mm/page_alloc.c | 6 +
mm/vmstat.c | 3 +
39 files changed, 649 insertions(+), 97 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c

--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:13:04

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 01/17] arm64: mm: don't use x18 in idmap_kpti_install_ng_mappings

idmap_kpti_install_ng_mappings uses x18 as a temporary register, which
will result in a conflict when x18 is reserved. Use x16 and x17 instead
where needed.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
---
arch/arm64/mm/proc.S | 63 ++++++++++++++++++++++----------------------
1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index a1e0592d1fbc..fdabf40a83c8 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -250,15 +250,15 @@ ENTRY(idmap_kpti_install_ng_mappings)
/* We're the boot CPU. Wait for the others to catch up */
sevl
1: wfe
- ldaxr w18, [flag_ptr]
- eor w18, w18, num_cpus
- cbnz w18, 1b
+ ldaxr w17, [flag_ptr]
+ eor w17, w17, num_cpus
+ cbnz w17, 1b

/* We need to walk swapper, so turn off the MMU. */
pre_disable_mmu_workaround
- mrs x18, sctlr_el1
- bic x18, x18, #SCTLR_ELx_M
- msr sctlr_el1, x18
+ mrs x17, sctlr_el1
+ bic x17, x17, #SCTLR_ELx_M
+ msr sctlr_el1, x17
isb

/* Everybody is enjoying the idmap, so we can rewrite swapper. */
@@ -281,9 +281,9 @@ skip_pgd:
isb

/* We're done: fire up the MMU again */
- mrs x18, sctlr_el1
- orr x18, x18, #SCTLR_ELx_M
- msr sctlr_el1, x18
+ mrs x17, sctlr_el1
+ orr x17, x17, #SCTLR_ELx_M
+ msr sctlr_el1, x17
isb

/*
@@ -353,46 +353,47 @@ skip_pte:
b.ne do_pte
b next_pmd

+ .unreq cpu
+ .unreq num_cpus
+ .unreq swapper_pa
+ .unreq cur_pgdp
+ .unreq end_pgdp
+ .unreq pgd
+ .unreq cur_pudp
+ .unreq end_pudp
+ .unreq pud
+ .unreq cur_pmdp
+ .unreq end_pmdp
+ .unreq pmd
+ .unreq cur_ptep
+ .unreq end_ptep
+ .unreq pte
+
/* Secondary CPUs end up here */
__idmap_kpti_secondary:
/* Uninstall swapper before surgery begins */
- __idmap_cpu_set_reserved_ttbr1 x18, x17
+ __idmap_cpu_set_reserved_ttbr1 x16, x17

/* Increment the flag to let the boot CPU we're ready */
-1: ldxr w18, [flag_ptr]
- add w18, w18, #1
- stxr w17, w18, [flag_ptr]
+1: ldxr w16, [flag_ptr]
+ add w16, w16, #1
+ stxr w17, w16, [flag_ptr]
cbnz w17, 1b

/* Wait for the boot CPU to finish messing around with swapper */
sevl
1: wfe
- ldxr w18, [flag_ptr]
- cbnz w18, 1b
+ ldxr w16, [flag_ptr]
+ cbnz w16, 1b

/* All done, act like nothing happened */
- offset_ttbr1 swapper_ttb, x18
+ offset_ttbr1 swapper_ttb, x16
msr ttbr1_el1, swapper_ttb
isb
ret

- .unreq cpu
- .unreq num_cpus
- .unreq swapper_pa
.unreq swapper_ttb
.unreq flag_ptr
- .unreq cur_pgdp
- .unreq end_pgdp
- .unreq pgd
- .unreq cur_pudp
- .unreq end_pudp
- .unreq pud
- .unreq cur_pmdp
- .unreq end_pmdp
- .unreq pmd
- .unreq cur_ptep
- .unreq end_ptep
- .unreq pte
ENDPROC(idmap_kpti_install_ng_mappings)
.popsection
#endif
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:13:07

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 02/17] arm64/lib: copy_page: avoid x18 register in assembler code

From: Ard Biesheuvel <[email protected]>

Register x18 will no longer be used as a caller save register in the
future, so stop using it in the copy_page() code.

Link: https://patchwork.kernel.org/patch/9836869/
Signed-off-by: Ard Biesheuvel <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/lib/copy_page.S | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index bbb8562396af..8b562264c165 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -34,45 +34,45 @@ alternative_else_nop_endif
ldp x14, x15, [x1, #96]
ldp x16, x17, [x1, #112]

- mov x18, #(PAGE_SIZE - 128)
+ add x0, x0, #256
add x1, x1, #128
1:
- subs x18, x18, #128
+ tst x0, #(PAGE_SIZE - 1)

alternative_if ARM64_HAS_NO_HW_PREFETCH
prfm pldl1strm, [x1, #384]
alternative_else_nop_endif

- stnp x2, x3, [x0]
+ stnp x2, x3, [x0, #-256]
ldp x2, x3, [x1]
- stnp x4, x5, [x0, #16]
+ stnp x4, x5, [x0, #-240]
ldp x4, x5, [x1, #16]
- stnp x6, x7, [x0, #32]
+ stnp x6, x7, [x0, #-224]
ldp x6, x7, [x1, #32]
- stnp x8, x9, [x0, #48]
+ stnp x8, x9, [x0, #-208]
ldp x8, x9, [x1, #48]
- stnp x10, x11, [x0, #64]
+ stnp x10, x11, [x0, #-192]
ldp x10, x11, [x1, #64]
- stnp x12, x13, [x0, #80]
+ stnp x12, x13, [x0, #-176]
ldp x12, x13, [x1, #80]
- stnp x14, x15, [x0, #96]
+ stnp x14, x15, [x0, #-160]
ldp x14, x15, [x1, #96]
- stnp x16, x17, [x0, #112]
+ stnp x16, x17, [x0, #-144]
ldp x16, x17, [x1, #112]

add x0, x0, #128
add x1, x1, #128

- b.gt 1b
+ b.ne 1b

- stnp x2, x3, [x0]
- stnp x4, x5, [x0, #16]
- stnp x6, x7, [x0, #32]
- stnp x8, x9, [x0, #48]
- stnp x10, x11, [x0, #64]
- stnp x12, x13, [x0, #80]
- stnp x14, x15, [x0, #96]
- stnp x16, x17, [x0, #112]
+ stnp x2, x3, [x0, #-256]
+ stnp x4, x5, [x0, #-240]
+ stnp x6, x7, [x0, #-224]
+ stnp x8, x9, [x0, #-208]
+ stnp x10, x11, [x0, #-192]
+ stnp x12, x13, [x0, #-176]
+ stnp x14, x15, [x0, #-160]
+ stnp x16, x17, [x0, #-144]

ret
ENDPROC(copy_page)
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:13:09

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 04/17] arm64: kernel: avoid x18 as an arbitrary temp register

From: Ard Biesheuvel <[email protected]>

The code in __cpu_soft_restart() uses x18 as an arbitrary temp register,
which will shortly be disallowed. So use x8 instead.

Link: https://patchwork.kernel.org/patch/9836877/
Signed-off-by: Ard Biesheuvel <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/kernel/cpu-reset.S | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 6ea337d464c4..32c7bf858dd9 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -42,11 +42,11 @@ ENTRY(__cpu_soft_restart)
mov x0, #HVC_SOFT_RESTART
hvc #0 // no return

-1: mov x18, x1 // entry
+1: mov x8, x1 // entry
mov x0, x2 // arg0
mov x1, x3 // arg1
mov x2, x4 // arg2
- br x18
+ br x8
ENDPROC(__cpu_soft_restart)

.popsection
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:13:15

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 03/17] arm64: kvm: stop treating register x18 as caller save

In preparation of reserving x18, stop treating it as caller save in
the KVM guest entry/exit code. Currently, the code assumes there is
no need to preserve it for the host, given that it would have been
assumed clobbered anyway by the function call to __guest_enter().
Instead, preserve its value and restore it upon return.

Co-developed-by: Ard Biesheuvel <[email protected]>
Link: https://patchwork.kernel.org/patch/9836891/
[ updated commit message, switched from x18 to x29 for the guest context ]
Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/kvm/hyp/entry.S | 41 +++++++++++++++++++-------------------
1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index e5cc8d66bf53..c3c2d842c609 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -23,6 +23,7 @@
.pushsection .hyp.text, "ax"

.macro save_callee_saved_regs ctxt
+ str x18, [\ctxt, #CPU_XREG_OFFSET(18)]
stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
@@ -32,6 +33,8 @@
.endm

.macro restore_callee_saved_regs ctxt
+ // We assume \ctxt is not x18-x28
+ ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)]
ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
@@ -48,7 +51,7 @@ ENTRY(__guest_enter)
// x0: vcpu
// x1: host context
// x2-x17: clobbered by macros
- // x18: guest context
+ // x29: guest context

// Store the host regs
save_callee_saved_regs x1
@@ -67,31 +70,28 @@ alternative_else_nop_endif
ret

1:
- add x18, x0, #VCPU_CONTEXT
+ add x29, x0, #VCPU_CONTEXT

// Macro ptrauth_switch_to_guest format:
// ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
// The below macro to restore guest keys is not implemented in C code
// as it may cause Pointer Authentication key signing mismatch errors
// when this feature is enabled for kernel code.
- ptrauth_switch_to_guest x18, x0, x1, x2
+ ptrauth_switch_to_guest x29, x0, x1, x2

// Restore guest regs x0-x17
- ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)]
- ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)]
- ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)]
- ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)]
- ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)]
- ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)]
- ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)]
- ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)]
- ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)]
-
- // Restore guest regs x19-x29, lr
- restore_callee_saved_regs x18
-
- // Restore guest reg x18
- ldr x18, [x18, #CPU_XREG_OFFSET(18)]
+ ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)]
+ ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)]
+ ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)]
+ ldp x6, x7, [x29, #CPU_XREG_OFFSET(6)]
+ ldp x8, x9, [x29, #CPU_XREG_OFFSET(8)]
+ ldp x10, x11, [x29, #CPU_XREG_OFFSET(10)]
+ ldp x12, x13, [x29, #CPU_XREG_OFFSET(12)]
+ ldp x14, x15, [x29, #CPU_XREG_OFFSET(14)]
+ ldp x16, x17, [x29, #CPU_XREG_OFFSET(16)]
+
+ // Restore guest regs x18-x29, lr
+ restore_callee_saved_regs x29

// Do not touch any register after this!
eret
@@ -114,7 +114,7 @@ ENTRY(__guest_exit)
// Retrieve the guest regs x0-x1 from the stack
ldp x2, x3, [sp], #16 // x0, x1

- // Store the guest regs x0-x1 and x4-x18
+ // Store the guest regs x0-x1 and x4-x17
stp x2, x3, [x1, #CPU_XREG_OFFSET(0)]
stp x4, x5, [x1, #CPU_XREG_OFFSET(4)]
stp x6, x7, [x1, #CPU_XREG_OFFSET(6)]
@@ -123,9 +123,8 @@ ENTRY(__guest_exit)
stp x12, x13, [x1, #CPU_XREG_OFFSET(12)]
stp x14, x15, [x1, #CPU_XREG_OFFSET(14)]
stp x16, x17, [x1, #CPU_XREG_OFFSET(16)]
- str x18, [x1, #CPU_XREG_OFFSET(18)]

- // Store the guest regs x19-x29, lr
+ // Store the guest regs x18-x29, lr
save_callee_saved_regs x1

get_host_ctxt x2, x3
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:13:25

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 06/17] scs: add accounting

This change adds accounting for the memory allocated for shadow stacks.

Signed-off-by: Sami Tolvanen <[email protected]>
---
drivers/base/node.c | 6 ++++++
fs/proc/meminfo.c | 4 ++++
include/linux/mmzone.h | 3 +++
kernel/scs.c | 20 ++++++++++++++++++++
mm/page_alloc.c | 6 ++++++
mm/vmstat.c | 3 +++
6 files changed, 42 insertions(+)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 296546ffed6c..111e58ec231e 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d AnonPages: %8lu kB\n"
"Node %d Shmem: %8lu kB\n"
"Node %d KernelStack: %8lu kB\n"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ "Node %d ShadowCallStack:%8lu kB\n"
+#endif
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
@@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8c1f1bb1a5ce..49768005a79e 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB));
+#ifdef CONFIG_SHADOW_CALL_STACK
+ seq_printf(m, "ShadowCallStack:%8lu kB\n",
+ global_zone_page_state(NR_KERNEL_SCS_BYTES) / 1024);
+#endif
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bda20282746b..fcb8c1708f9e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -200,6 +200,9 @@ enum zone_stat_item {
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ NR_KERNEL_SCS_BYTES, /* measured in bytes */
+#endif
/* Second 128 byte cacheline */
NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
diff --git a/kernel/scs.c b/kernel/scs.c
index 383d29e8c199..b9e6e225254f 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -7,9 +7,11 @@

#include <linux/cpuhotplug.h>
#include <linux/mm.h>
+#include <linux/mmzone.h>
#include <linux/slab.h>
#include <linux/scs.h>
#include <linux/vmalloc.h>
+#include <linux/vmstat.h>
#include <asm/scs.h>

static inline void *__scs_base(struct task_struct *tsk)
@@ -59,6 +61,11 @@ static void scs_free(void *s)
vfree_atomic(s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return vmalloc_to_page(__scs_base(tsk));
+}
+
static int scs_cleanup(unsigned int cpu)
{
int i;
@@ -92,6 +99,11 @@ static inline void scs_free(void *s)
kmem_cache_free(scs_cache, s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return virt_to_page(__scs_base(tsk));
+}
+
void __init scs_init(void)
{
scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
@@ -121,6 +133,12 @@ void scs_task_reset(struct task_struct *tsk)
task_set_scs(tsk, __scs_base(tsk));
}

+static void scs_account(struct task_struct *tsk, int account)
+{
+ mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_BYTES,
+ account * SCS_SIZE);
+}
+
int scs_prepare(struct task_struct *tsk, int node)
{
void *s;
@@ -131,6 +149,7 @@ int scs_prepare(struct task_struct *tsk, int node)

task_set_scs(tsk, s);
scs_set_magic(tsk);
+ scs_account(tsk, 1);

return 0;
}
@@ -150,6 +169,7 @@ void scs_release(struct task_struct *tsk)

WARN_ON(scs_corrupted(tsk));

+ scs_account(tsk, -1);
scs_task_init(tsk);
scs_free(s);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ecc3dbad606b..fe17d69d98a7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5361,6 +5361,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" managed:%lukB"
" mlocked:%lukB"
" kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ " shadow_call_stack:%lukB"
+#endif
" pagetables:%lukB"
" bounce:%lukB"
" free_pcp:%lukB"
@@ -5382,6 +5385,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)),
zone_page_state(zone, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ zone_page_state(zone, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
K(zone_page_state(zone, NR_PAGETABLE)),
K(zone_page_state(zone, NR_BOUNCE)),
K(free_pcp),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6afc892a148a..9fe4afe670fe 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1118,6 +1118,9 @@ const char * const vmstat_text[] = {
"nr_mlock",
"nr_page_table_pages",
"nr_kernel_stack",
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ "nr_shadow_call_stack_bytes",
+#endif
"nr_bounce",
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:13:26

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 07/17] scs: add support for stack usage debugging

Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks.

Signed-off-by: Sami Tolvanen <[email protected]>
---
kernel/scs.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)

diff --git a/kernel/scs.c b/kernel/scs.c
index b9e6e225254f..a5bf7d12dc13 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -154,6 +154,44 @@ int scs_prepare(struct task_struct *tsk, int node)
return 0;
}

+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long scs_used(struct task_struct *tsk)
+{
+ unsigned long *p = __scs_base(tsk);
+ unsigned long *end = scs_magic(tsk);
+ uintptr_t s = (uintptr_t)p;
+
+ while (p < end && *p)
+ p++;
+
+ return (uintptr_t)p - s;
+}
+
+static void scs_check_usage(struct task_struct *tsk)
+{
+ static DEFINE_SPINLOCK(lock);
+ static unsigned long highest;
+ unsigned long used = scs_used(tsk);
+
+ if (used <= highest)
+ return;
+
+ spin_lock(&lock);
+
+ if (used > highest) {
+ pr_info("%s: highest shadow stack usage %lu bytes\n",
+ __func__, used);
+ highest = used;
+ }
+
+ spin_unlock(&lock);
+}
+#else
+static inline void scs_check_usage(struct task_struct *tsk)
+{
+}
+#endif
+
bool scs_corrupted(struct task_struct *tsk)
{
return *scs_magic(tsk) != SCS_END_MAGIC;
@@ -168,6 +206,7 @@ void scs_release(struct task_struct *tsk)
return;

WARN_ON(scs_corrupted(tsk));
+ scs_check_usage(tsk);

scs_account(tsk, -1);
scs_task_init(tsk);
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:13:42

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 14/17] arm64: vdso: disable Shadow Call Stack

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/kernel/vdso/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index dd2514bb1511..a87a4f11724e 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -25,7 +25,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING

VDSO_LDFLAGS := -Bsymbolic

-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS)
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:13:47

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 17/17] arm64: implement Shadow Call Stack

This change implements shadow stack switching, initial SCS set-up,
and interrupt shadow stacks for arm64.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/Kconfig | 5 ++++
arch/arm64/include/asm/scs.h | 45 ++++++++++++++++++++++++++++
arch/arm64/include/asm/stacktrace.h | 4 +++
arch/arm64/include/asm/thread_info.h | 3 ++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 ++
arch/arm64/kernel/entry.S | 28 +++++++++++++++++
arch/arm64/kernel/head.S | 9 ++++++
arch/arm64/kernel/irq.c | 2 ++
arch/arm64/kernel/process.c | 2 ++
arch/arm64/kernel/scs.c | 39 ++++++++++++++++++++++++
arch/arm64/kernel/smp.c | 4 +++
12 files changed, 145 insertions(+)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 42867174920f..f4c94c5e8012 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -66,6 +66,7 @@ config ARM64
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_SUPPORTS_MEMORY_FAILURE
+ select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG
select ARCH_SUPPORTS_NUMA_BALANCING
@@ -948,6 +949,10 @@ config ARCH_HAS_CACHE_LINE_SIZE
config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y if PGTABLE_LEVELS > 2

+# Supported by clang >= 7.0
+config CC_HAVE_SHADOW_CALL_STACK
+ def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
+
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
---help---
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
new file mode 100644
index 000000000000..76dda1228935
--- /dev/null
+++ b/arch/arm64/include/asm/scs.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/scs.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+extern void scs_init_irq(void);
+
+static __always_inline void scs_save(struct task_struct *tsk)
+{
+ void *s;
+
+ asm volatile("mov %0, x18" : "=r" (s));
+ task_set_scs(tsk, s);
+}
+
+static inline void scs_overflow_check(struct task_struct *tsk)
+{
+ if (unlikely(scs_corrupted(tsk)))
+ panic("corrupted shadow stack detected inside scheduler\n");
+}
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+static inline void scs_init_irq(void)
+{
+}
+
+static inline void scs_save(struct task_struct *tsk)
+{
+}
+
+static inline void scs_overflow_check(struct task_struct *tsk)
+{
+}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* __ASSEMBLY __ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 4d9b1f48dc39..b6cf32fb4efe 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -68,6 +68,10 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);

DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);

+#ifdef CONFIG_SHADOW_CALL_STACK
+DECLARE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+#endif
+
static inline bool on_irq_stack(unsigned long sp,
struct stack_info *info)
{
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index f0cec4160136..8c73764b9ed2 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -41,6 +41,9 @@ struct thread_info {
#endif
} preempt;
};
+#ifdef CONFIG_SHADOW_CALL_STACK
+ void *shadow_call_stack;
+#endif
};

#define thread_saved_pc(tsk) \
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 478491f07b4f..b3995329d9e5 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-y += vdso/ probes/
obj-$(CONFIG_COMPAT_VDSO) += vdso32/
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 214685760e1c..f6762b9ae1e1 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -33,6 +33,9 @@ int main(void)
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
+#endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+ DEFINE(TSK_TI_SCS, offsetof(struct task_struct, thread_info.shadow_call_stack));
#endif
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
#ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index cf3bd2976e57..12a5bc209280 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -172,6 +172,10 @@ alternative_cb_end

apply_ssbd 1, x22, x23

+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [tsk, #TSK_TI_SCS] // Restore shadow call stack
+ str xzr, [tsk, #TSK_TI_SCS]
+#endif
.else
add x21, sp, #S_FRAME_SIZE
get_current_task tsk
@@ -278,6 +282,12 @@ alternative_else_nop_endif
ct_user_enter
.endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ .if \el == 0
+ str x18, [tsk, #TSK_TI_SCS] // Save shadow call stack
+ .endif
+#endif
+
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
@@ -383,6 +393,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

.macro irq_stack_entry
mov x19, sp // preserve the original sp
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x20, x18 // preserve the original shadow stack
+#endif

/*
* Compare sp with the base of the task stack.
@@ -400,6 +413,12 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

/* switch to the irq stack */
mov sp, x26
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* also switch to the irq shadow stack */
+ ldr_this_cpu x18, irq_shadow_call_stack_ptr, x26
+#endif
+
9998:
.endm

@@ -409,6 +428,10 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
*/
.macro irq_stack_exit
mov sp, x19
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* x20 is also preserved */
+ mov x18, x20
+#endif
.endm

/* GPRs used by entry code */
@@ -1155,6 +1178,11 @@ ENTRY(cpu_switch_to)
ldr lr, [x8]
mov sp, x9
msr sp_el0, x1
+#ifdef CONFIG_SHADOW_CALL_STACK
+ str x18, [x0, #TSK_TI_SCS]
+ ldr x18, [x1, #TSK_TI_SCS]
+ str xzr, [x1, #TSK_TI_SCS]
+#endif
ret
ENDPROC(cpu_switch_to)
NOKPROBE(cpu_switch_to)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 989b1944cb71..2be977c6496f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -27,6 +27,7 @@
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/scs.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
@@ -424,6 +425,10 @@ __primary_switched:
stp xzr, x30, [sp, #-16]!
mov x29, sp

+#ifdef CONFIG_SHADOW_CALL_STACK
+ adr_l x18, init_shadow_call_stack // Set shadow call stack
+#endif
+
str_l x21, __fdt_pointer, x5 // Save FDT pointer

ldr_l x4, kimage_vaddr // Save the offset between
@@ -731,6 +736,10 @@ __secondary_switched:
ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow
msr sp_el0, x2
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [x2, #TSK_TI_SCS] // Set shadow call stack
+ str xzr, [x2, #TSK_TI_SCS]
+#endif
mov x29, #0
mov x30, #0
b secondary_start_kernel
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 04a327ccf84d..fe0ca522ff60 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -21,6 +21,7 @@
#include <linux/vmalloc.h>
#include <asm/daifflags.h>
#include <asm/vmap_stack.h>
+#include <asm/scs.h>

unsigned long irq_err_count;

@@ -63,6 +64,7 @@ static void init_irq_stacks(void)
void __init init_IRQ(void)
{
init_irq_stacks();
+ scs_init_irq();
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 71f788cd2b18..5f0aec285848 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -52,6 +52,7 @@
#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/pointer_auth.h>
+#include <asm/scs.h>
#include <asm/stacktrace.h>

#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
@@ -507,6 +508,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
uao_thread_switch(next);
ptrauth_thread_switch(next);
ssbs_thread_switch(next);
+ scs_overflow_check(next);

/*
* Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
new file mode 100644
index 000000000000..6f255072c9a9
--- /dev/null
+++ b/arch/arm64/kernel/scs.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/percpu.h>
+#include <linux/vmalloc.h>
+#include <asm/scs.h>
+
+DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+
+#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
+DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
+ __aligned(SCS_SIZE);
+#endif
+
+void scs_init_irq(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+ unsigned long *p;
+
+ p = __vmalloc_node_range(SCS_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ SCS_GFP, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
+#else
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ per_cpu(irq_shadow_call_stack, cpu);
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+ }
+}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dc9fe879c279..cc1938a585d2 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -44,6 +44,7 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
+#include <asm/scs.h>
#include <asm/smp_plat.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -357,6 +358,9 @@ void cpu_die(void)
{
unsigned int cpu = smp_processor_id();

+ /* Save the shadow stack pointer before exiting the idle task */
+ scs_save(current);
+
idle_task_exit();

local_daif_mask();
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:13:53

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 16/17] arm64: disable SCS for hypervisor code

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/kvm/hyp/Makefile | 3 +++
1 file changed, 3 insertions(+)

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index ea710f674cb6..8289ea086e5e 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -28,3 +28,6 @@ GCOV_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
+
+ORIG_CFLAGS := $(KBUILD_CFLAGS)
+KBUILD_CFLAGS = $(subst $(CC_FLAGS_SCS),,$(ORIG_CFLAGS))
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:14:42

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 15/17] arm64: kprobes: fix kprobes without CONFIG_KRETPROBES

This allows CONFIG_KRETPROBES to be disabled without disabling
kprobes entirely.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/kernel/probes/kprobes.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index c4452827419b..98230ae979ca 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -551,6 +551,7 @@ void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
return (void *)orig_ret_address;
}

+#ifdef CONFIG_KRETPROBES
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
@@ -564,6 +565,7 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
{
return 0;
}
+#endif

int __init arch_init_kprobes(void)
{
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:15:21

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 05/17] add support for Clang's Shadow Call Stack (SCS)

This change adds generic support for Clang's Shadow Call Stack,
which uses a shadow stack to protect return addresses from being
overwritten by an attacker. Details are available here:

https://clang.llvm.org/docs/ShadowCallStack.html

Note that security guarantees in the kernel differ from the
ones documented for user space. The kernel must store addresses
of shadow stacks used by other tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

Signed-off-by: Sami Tolvanen <[email protected]>
---
Makefile | 6 ++
arch/Kconfig | 33 +++++++
include/linux/compiler-clang.h | 6 ++
include/linux/compiler_types.h | 4 +
include/linux/scs.h | 78 +++++++++++++++++
init/init_task.c | 8 ++
kernel/Makefile | 1 +
kernel/fork.c | 9 ++
kernel/sched/core.c | 2 +
kernel/sched/sched.h | 1 +
kernel/scs.c | 155 +++++++++++++++++++++++++++++++++
11 files changed, 303 insertions(+)
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c

diff --git a/Makefile b/Makefile
index 5475cdb6d57d..2b5c59fb18f2 100644
--- a/Makefile
+++ b/Makefile
@@ -846,6 +846,12 @@ ifdef CONFIG_LIVEPATCH
KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
endif

+ifdef CONFIG_SHADOW_CALL_STACK
+CC_FLAGS_SCS := -fsanitize=shadow-call-stack
+KBUILD_CFLAGS += $(CC_FLAGS_SCS)
+export CC_FLAGS_SCS
+endif
+
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)

diff --git a/arch/Kconfig b/arch/Kconfig
index 5f8a5d84dbbe..5e34cbcd8d6a 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -521,6 +521,39 @@ config STACKPROTECTOR_STRONG
about 20% of all kernel functions, which increases the kernel code
size by about 2%.

+config ARCH_SUPPORTS_SHADOW_CALL_STACK
+ bool
+ help
+ An architecture should select this if it supports Clang's Shadow
+ Call Stack, has asm/scs.h, and implements runtime support for shadow
+ stack switching.
+
+config SHADOW_CALL_STACK_VMAP
+ bool
+ depends on SHADOW_CALL_STACK
+ help
+ Use virtually mapped shadow call stacks. Selecting this option
+ provides better stack exhaustion protection, but increases per-thread
+ memory consumption as a full page is allocated for each shadow stack.
+
+config SHADOW_CALL_STACK
+ bool "Clang Shadow Call Stack"
+ depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
+ help
+ This option enables Clang's Shadow Call Stack, which uses a
+ shadow stack to protect function return addresses from being
+ overwritten by an attacker. More information can be found from
+ Clang's documentation:
+
+ https://clang.llvm.org/docs/ShadowCallStack.html
+
+ Note that security guarantees in the kernel differ from the ones
+ documented for user space. The kernel must store addresses of shadow
+ stacks used by other tasks and interrupt handlers in memory, which
+ means an attacker capable reading and writing arbitrary memory may
+ be able to locate them and hijack control flow by modifying shadow
+ stacks that are not currently in use.
+
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 333a6695a918..afe5e24088b2 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -42,3 +42,9 @@
* compilers, like ICC.
*/
#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#if __has_feature(shadow_call_stack)
+# define __noscs __attribute__((no_sanitize("shadow-call-stack")))
+#else
+# define __noscs
+#endif
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 72393a8c1a6c..be5d5be4b1ae 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -202,6 +202,10 @@ struct ftrace_likely_data {
# define randomized_struct_fields_end
#endif

+#ifndef __noscs
+# define __noscs
+#endif
+
#ifndef asm_volatile_goto
#define asm_volatile_goto(x...) asm goto(x)
#endif
diff --git a/include/linux/scs.h b/include/linux/scs.h
new file mode 100644
index 000000000000..c8b0ccfdd803
--- /dev/null
+++ b/include/linux/scs.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2018 Google LLC
+ */
+
+#ifndef _LINUX_SCS_H
+#define _LINUX_SCS_H
+
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+#define SCS_SIZE 1024
+#define SCS_END_MAGIC 0xaf0194819b1635f6UL
+
+#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
+
+static inline void *task_scs(struct task_struct *tsk)
+{
+ return task_thread_info(tsk)->shadow_call_stack;
+}
+
+static inline void task_set_scs(struct task_struct *tsk, void *s)
+{
+ task_thread_info(tsk)->shadow_call_stack = s;
+}
+
+extern void scs_init(void);
+extern void scs_task_init(struct task_struct *tsk);
+extern void scs_task_reset(struct task_struct *tsk);
+extern int scs_prepare(struct task_struct *tsk, int node);
+extern bool scs_corrupted(struct task_struct *tsk);
+extern void scs_release(struct task_struct *tsk);
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+static inline void *task_scs(struct task_struct *tsk)
+{
+ return 0;
+}
+
+static inline void task_set_scs(struct task_struct *tsk, void *s)
+{
+}
+
+static inline void scs_init(void)
+{
+}
+
+static inline void scs_task_init(struct task_struct *tsk)
+{
+}
+
+static inline void scs_task_reset(struct task_struct *tsk)
+{
+}
+
+static inline int scs_prepare(struct task_struct *tsk, int node)
+{
+ return 0;
+}
+
+static inline bool scs_corrupted(struct task_struct *tsk)
+{
+ return false;
+}
+
+static inline void scs_release(struct task_struct *tsk)
+{
+}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* _LINUX_SCS_H */
diff --git a/init/init_task.c b/init/init_task.c
index 9e5cbe5eab7b..cbd40460e903 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/audit.h>
#include <linux/numa.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <linux/uaccess.h>
@@ -184,6 +185,13 @@ struct task_struct init_task
};
EXPORT_SYMBOL(init_task);

+#ifdef CONFIG_SHADOW_CALL_STACK
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
+ __aligned(SCS_SIZE) = {
+ [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
+};
+#endif
+
/*
* Initial thread structure. Alignment of this is handled by a special
* linker map entry.
diff --git a/kernel/Makefile b/kernel/Makefile
index daad787fb795..313dbd44d576 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-$(CONFIG_PERF_EVENTS) += events/

diff --git a/kernel/fork.c b/kernel/fork.c
index bcdf53125210..ae7ebe9f0586 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,6 +94,7 @@
#include <linux/livepatch.h>
#include <linux/thread_info.h>
#include <linux/stackleak.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -451,6 +452,8 @@ void put_task_stack(struct task_struct *tsk)

void free_task(struct task_struct *tsk)
{
+ scs_release(tsk);
+
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
@@ -834,6 +837,8 @@ void __init fork_init(void)
NULL, free_vm_stack_cache);
#endif

+ scs_init();
+
lockdep_init_task(&init_task);
uprobes_init();
}
@@ -907,6 +912,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
clear_user_return_notifier(tsk);
clear_tsk_need_resched(tsk);
set_task_stack_end_magic(tsk);
+ scs_task_init(tsk);

#ifdef CONFIG_STACKPROTECTOR
tsk->stack_canary = get_random_canary();
@@ -2022,6 +2028,9 @@ static __latent_entropy struct task_struct *copy_process(
args->tls);
if (retval)
goto bad_fork_cleanup_io;
+ retval = scs_prepare(p, node);
+ if (retval)
+ goto bad_fork_cleanup_thread;

stackleak_task_init(p);

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index dd05a378631a..e7faeb383008 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6013,6 +6013,8 @@ void init_idle(struct task_struct *idle, int cpu)
raw_spin_lock_irqsave(&idle->pi_lock, flags);
raw_spin_lock(&rq->lock);

+ scs_task_reset(idle);
+
__sched_fork(0, idle);
idle->state = TASK_RUNNING;
idle->se.exec_start = sched_clock();
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0db2c1b3361e..c153003a011c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -58,6 +58,7 @@
#include <linux/profile.h>
#include <linux/psi.h>
#include <linux/rcupdate_wait.h>
+#include <linux/scs.h>
#include <linux/security.h>
#include <linux/stop_machine.h>
#include <linux/suspend.h>
diff --git a/kernel/scs.c b/kernel/scs.c
new file mode 100644
index 000000000000..383d29e8c199
--- /dev/null
+++ b/kernel/scs.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/scs.h>
+#include <linux/vmalloc.h>
+#include <asm/scs.h>
+
+static inline void *__scs_base(struct task_struct *tsk)
+{
+ return (void *)((uintptr_t)task_scs(tsk) & ~(SCS_SIZE - 1));
+}
+
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+
+/* Keep a cache of shadow stacks */
+#define SCS_CACHE_SIZE 2
+static DEFINE_PER_CPU(void *, scs_cache[SCS_CACHE_SIZE]);
+
+static void *scs_alloc(int node)
+{
+ int i;
+
+ for (i = 0; i < SCS_CACHE_SIZE; i++) {
+ void *s;
+
+ s = this_cpu_xchg(scs_cache[i], NULL);
+ if (s) {
+ memset(s, 0, SCS_SIZE);
+ return s;
+ }
+ }
+
+ BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);
+
+ return __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL, 0,
+ node, __builtin_return_address(0));
+}
+
+static void scs_free(void *s)
+{
+ int i;
+
+ for (i = 0; i < SCS_CACHE_SIZE; i++) {
+ if (this_cpu_cmpxchg(scs_cache[i], 0, s) != 0)
+ continue;
+
+ return;
+ }
+
+ vfree_atomic(s);
+}
+
+static int scs_cleanup(unsigned int cpu)
+{
+ int i;
+ void **cache = per_cpu_ptr(scs_cache, cpu);
+
+ for (i = 0; i < SCS_CACHE_SIZE; i++) {
+ vfree(cache[i]);
+ cache[i] = NULL;
+ }
+
+ return 0;
+}
+
+void __init scs_init(void)
+{
+ cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
+ scs_cleanup);
+}
+
+#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */
+
+static struct kmem_cache *scs_cache;
+
+static inline void *scs_alloc(int node)
+{
+ return kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
+}
+
+static inline void scs_free(void *s)
+{
+ kmem_cache_free(scs_cache, s);
+}
+
+void __init scs_init(void)
+{
+ scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
+ 0, NULL);
+ WARN_ON(!scs_cache);
+}
+
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+static inline unsigned long *scs_magic(struct task_struct *tsk)
+{
+ return (unsigned long *)(__scs_base(tsk) + SCS_SIZE - sizeof(long));
+}
+
+static inline void scs_set_magic(struct task_struct *tsk)
+{
+ *scs_magic(tsk) = SCS_END_MAGIC;
+}
+
+void scs_task_init(struct task_struct *tsk)
+{
+ task_set_scs(tsk, NULL);
+}
+
+void scs_task_reset(struct task_struct *tsk)
+{
+ task_set_scs(tsk, __scs_base(tsk));
+}
+
+int scs_prepare(struct task_struct *tsk, int node)
+{
+ void *s;
+
+ s = scs_alloc(node);
+ if (!s)
+ return -ENOMEM;
+
+ task_set_scs(tsk, s);
+ scs_set_magic(tsk);
+
+ return 0;
+}
+
+bool scs_corrupted(struct task_struct *tsk)
+{
+ return *scs_magic(tsk) != SCS_END_MAGIC;
+}
+
+void scs_release(struct task_struct *tsk)
+{
+ void *s;
+
+ s = __scs_base(tsk);
+ if (!s)
+ return;
+
+ WARN_ON(scs_corrupted(tsk));
+
+ scs_task_init(tsk);
+ scs_free(s);
+}
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:15:31

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 10/17] arm64: disable kretprobes with SCS

With CONFIG_KRETPROBES, function return addresses are modified to
redirect control flow to kretprobe_trampoline. This is incompatible
with SCS.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8cda176dad9a..42867174920f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -165,7 +165,7 @@ config ARM64
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KPROBES
- select HAVE_KRETPROBES
+ select HAVE_KRETPROBES if !SHADOW_CALL_STACK
select HAVE_GENERIC_VDSO
select IOMMU_DMA if IOMMU_SUPPORT
select IRQ_DOMAIN
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:15:30

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 09/17] arm64: disable function graph tracing with SCS

With CONFIG_FUNCTION_GRAPH_TRACER, function return addresses are
modified in ftrace_graph_caller and prepare_ftrace_return to redirect
control flow to ftrace_return_to_handler. This is incompatible with
SCS.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3f047afb982c..8cda176dad9a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -148,7 +148,7 @@ config ARM64
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
- select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FUNCTION_GRAPH_TRACER if !SHADOW_CALL_STACK
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IRQ_TIME_ACCOUNTING
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:15:33

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 08/17] kprobes: fix compilation without CONFIG_KRETPROBES

kprobe_on_func_entry and arch_kprobe_on_func_entry need to be available
even if CONFIG_KRETPROBES is not selected.

Signed-off-by: Sami Tolvanen <[email protected]>
Acked-by: Masami Hiramatsu <[email protected]>
---
kernel/kprobes.c | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 53534aa258a6..b5e20a4669b8 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1829,6 +1829,25 @@ unsigned long __weak arch_deref_entry_point(void *entry)
return (unsigned long)entry;
}

+bool __weak arch_kprobe_on_func_entry(unsigned long offset)
+{
+ return !offset;
+}
+
+bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
+{
+ kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
+
+ if (IS_ERR(kp_addr))
+ return false;
+
+ if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
+ !arch_kprobe_on_func_entry(offset))
+ return false;
+
+ return true;
+}
+
#ifdef CONFIG_KRETPROBES
/*
* This kprobe pre_handler is registered with every kretprobe. When probe
@@ -1885,25 +1904,6 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(pre_handler_kretprobe);

-bool __weak arch_kprobe_on_func_entry(unsigned long offset)
-{
- return !offset;
-}
-
-bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
-{
- kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
-
- if (IS_ERR(kp_addr))
- return false;
-
- if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
- !arch_kprobe_on_func_entry(offset))
- return false;
-
- return true;
-}
-
int register_kretprobe(struct kretprobe *rp)
{
int ret = 0;
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:15:38

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 12/17] arm64: preserve x18 when CPU is suspended

Don't lose the current task's shadow stack when the CPU is suspended.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/mm/proc.S | 9 +++++++++
2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 8939c87c4dce..0cde2f473971 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H

-#define NR_CTX_REGS 12
+#define NR_CTX_REGS 13
#define NR_CALLEE_SAVED_REGS 12

/*
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index fdabf40a83c8..0e7c353c9dfd 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -49,6 +49,8 @@
* cpu_do_suspend - save CPU registers context
*
* x0: virtual address of context pointer
+ *
+ * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
*/
ENTRY(cpu_do_suspend)
mrs x2, tpidr_el0
@@ -73,6 +75,9 @@ alternative_endif
stp x8, x9, [x0, #48]
stp x10, x11, [x0, #64]
stp x12, x13, [x0, #80]
+#ifdef CONFIG_SHADOW_CALL_STACK
+ str x18, [x0, #96]
+#endif
ret
ENDPROC(cpu_do_suspend)

@@ -89,6 +94,10 @@ ENTRY(cpu_do_resume)
ldp x9, x10, [x0, #48]
ldp x11, x12, [x0, #64]
ldp x13, x14, [x0, #80]
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [x0, #96]
+ str xzr, [x0, #96]
+#endif
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:15:41

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 13/17] arm64: efi: restore x18 if it was corrupted

If we detect a corrupted x18 and SCS is enabled, restore the register
before jumping back to instrumented code. This is safe, because the
wrapper is called with preemption disabled and a separate shadow stack
is used for interrupt handling.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/kernel/efi-rt-wrapper.S | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 3fc71106cb2b..945744f16086 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -34,5 +34,10 @@ ENTRY(__efi_rt_asm_wrapper)
ldp x29, x30, [sp], #32
b.ne 0f
ret
-0: b efi_handle_corrupted_x18 // tail call
+0:
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* Restore x18 before returning to instrumented code. */
+ mov x18, x2
+#endif
+ b efi_handle_corrupted_x18 // tail call
ENDPROC(__efi_rt_asm_wrapper)
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 19:18:14

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH v2 16/17] arm64: disable SCS for hypervisor code

On Thu, 24 Oct 2019 15:51:31 -0700
[email protected] wrote:

Suggested-by: Steven Rostedt (VMware) <[email protected]>

;-)

> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> arch/arm64/kvm/hyp/Makefile | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
> index ea710f674cb6..8289ea086e5e 100644
> --- a/arch/arm64/kvm/hyp/Makefile
> +++ b/arch/arm64/kvm/hyp/Makefile
> @@ -28,3 +28,6 @@ GCOV_PROFILE := n
> KASAN_SANITIZE := n
> UBSAN_SANITIZE := n
> KCOV_INSTRUMENT := n
> +
> +ORIG_CFLAGS := $(KBUILD_CFLAGS)
> +KBUILD_CFLAGS = $(subst $(CC_FLAGS_SCS),,$(ORIG_CFLAGS))

May want a comment above that that states:

# remove the SCS flags from all objects in this directory

-- Steve

2019-10-25 19:18:31

by Masahiro Yamada

[permalink] [raw]
Subject: Re: [PATCH v2 16/17] arm64: disable SCS for hypervisor code

On Fri, Oct 25, 2019 at 7:52 AM <[email protected]> wrote:
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> arch/arm64/kvm/hyp/Makefile | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
> index ea710f674cb6..8289ea086e5e 100644
> --- a/arch/arm64/kvm/hyp/Makefile
> +++ b/arch/arm64/kvm/hyp/Makefile
> @@ -28,3 +28,6 @@ GCOV_PROFILE := n
> KASAN_SANITIZE := n
> UBSAN_SANITIZE := n
> KCOV_INSTRUMENT := n
> +
> +ORIG_CFLAGS := $(KBUILD_CFLAGS)
> +KBUILD_CFLAGS = $(subst $(CC_FLAGS_SCS),,$(ORIG_CFLAGS))


$(subst ... ) is not the correct use here.

It works like sed, s/$(CC_CFLAGS_SCS)//
instead of matching by word.




KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))

is more correct, and simpler.




--
Best Regards
Masahiro Yamada

2019-10-25 19:20:58

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH v2 16/17] arm64: disable SCS for hypervisor code

On Fri, 25 Oct 2019 10:29:47 +0900
Masahiro Yamada <[email protected]> wrote:

> On Fri, Oct 25, 2019 at 7:52 AM <[email protected]> wrote:
> >
> > Signed-off-by: Sami Tolvanen <[email protected]>
> > ---
> > arch/arm64/kvm/hyp/Makefile | 3 +++
> > 1 file changed, 3 insertions(+)
> >
> > diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
> > index ea710f674cb6..8289ea086e5e 100644
> > --- a/arch/arm64/kvm/hyp/Makefile
> > +++ b/arch/arm64/kvm/hyp/Makefile
> > @@ -28,3 +28,6 @@ GCOV_PROFILE := n
> > KASAN_SANITIZE := n
> > UBSAN_SANITIZE := n
> > KCOV_INSTRUMENT := n
> > +
> > +ORIG_CFLAGS := $(KBUILD_CFLAGS)
> > +KBUILD_CFLAGS = $(subst $(CC_FLAGS_SCS),,$(ORIG_CFLAGS))
>
>
> $(subst ... ) is not the correct use here.
>
> It works like sed, s/$(CC_CFLAGS_SCS)//
> instead of matching by word.
>
>
>
>
> KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
>
> is more correct, and simpler.

I guess that would work too. Not sure why I never used it. I see mips
used it for their -pg flags.

-- Steve

2019-10-25 19:33:49

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v2 01/17] arm64: mm: don't use x18 in idmap_kpti_install_ng_mappings

On Thu, Oct 24, 2019 at 03:51:16PM -0700, [email protected] wrote:
> idmap_kpti_install_ng_mappings uses x18 as a temporary register, which
> will result in a conflict when x18 is reserved. Use x16 and x17 instead
> where needed.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Nick Desaulniers <[email protected]>

AFAICT the new register assignment is sound, so FWIW:

Reviewed-by: Mark Rutland <[email protected]>

I was going to suggest adding menmonics for the remamining raw register
names, but after having a go locally I think it's cleaner as-is given
the registers are used in different widths for multiple purposes.

Thanks,
Mark.

> ---
> arch/arm64/mm/proc.S | 63 ++++++++++++++++++++++----------------------
> 1 file changed, 32 insertions(+), 31 deletions(-)
>
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index a1e0592d1fbc..fdabf40a83c8 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -250,15 +250,15 @@ ENTRY(idmap_kpti_install_ng_mappings)
> /* We're the boot CPU. Wait for the others to catch up */
> sevl
> 1: wfe
> - ldaxr w18, [flag_ptr]
> - eor w18, w18, num_cpus
> - cbnz w18, 1b
> + ldaxr w17, [flag_ptr]
> + eor w17, w17, num_cpus
> + cbnz w17, 1b
>
> /* We need to walk swapper, so turn off the MMU. */
> pre_disable_mmu_workaround
> - mrs x18, sctlr_el1
> - bic x18, x18, #SCTLR_ELx_M
> - msr sctlr_el1, x18
> + mrs x17, sctlr_el1
> + bic x17, x17, #SCTLR_ELx_M
> + msr sctlr_el1, x17
> isb
>
> /* Everybody is enjoying the idmap, so we can rewrite swapper. */
> @@ -281,9 +281,9 @@ skip_pgd:
> isb
>
> /* We're done: fire up the MMU again */
> - mrs x18, sctlr_el1
> - orr x18, x18, #SCTLR_ELx_M
> - msr sctlr_el1, x18
> + mrs x17, sctlr_el1
> + orr x17, x17, #SCTLR_ELx_M
> + msr sctlr_el1, x17
> isb
>
> /*
> @@ -353,46 +353,47 @@ skip_pte:
> b.ne do_pte
> b next_pmd
>
> + .unreq cpu
> + .unreq num_cpus
> + .unreq swapper_pa
> + .unreq cur_pgdp
> + .unreq end_pgdp
> + .unreq pgd
> + .unreq cur_pudp
> + .unreq end_pudp
> + .unreq pud
> + .unreq cur_pmdp
> + .unreq end_pmdp
> + .unreq pmd
> + .unreq cur_ptep
> + .unreq end_ptep
> + .unreq pte
> +
> /* Secondary CPUs end up here */
> __idmap_kpti_secondary:
> /* Uninstall swapper before surgery begins */
> - __idmap_cpu_set_reserved_ttbr1 x18, x17
> + __idmap_cpu_set_reserved_ttbr1 x16, x17
>
> /* Increment the flag to let the boot CPU we're ready */
> -1: ldxr w18, [flag_ptr]
> - add w18, w18, #1
> - stxr w17, w18, [flag_ptr]
> +1: ldxr w16, [flag_ptr]
> + add w16, w16, #1
> + stxr w17, w16, [flag_ptr]
> cbnz w17, 1b
>
> /* Wait for the boot CPU to finish messing around with swapper */
> sevl
> 1: wfe
> - ldxr w18, [flag_ptr]
> - cbnz w18, 1b
> + ldxr w16, [flag_ptr]
> + cbnz w16, 1b
>
> /* All done, act like nothing happened */
> - offset_ttbr1 swapper_ttb, x18
> + offset_ttbr1 swapper_ttb, x16
> msr ttbr1_el1, swapper_ttb
> isb
> ret
>
> - .unreq cpu
> - .unreq num_cpus
> - .unreq swapper_pa
> .unreq swapper_ttb
> .unreq flag_ptr
> - .unreq cur_pgdp
> - .unreq end_pgdp
> - .unreq pgd
> - .unreq cur_pudp
> - .unreq end_pudp
> - .unreq pud
> - .unreq cur_pmdp
> - .unreq end_pmdp
> - .unreq pmd
> - .unreq cur_ptep
> - .unreq end_ptep
> - .unreq pte
> ENDPROC(idmap_kpti_install_ng_mappings)
> .popsection
> #endif
> --
> 2.24.0.rc0.303.g954a862665-goog
>

2019-10-25 19:37:05

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v2 02/17] arm64/lib: copy_page: avoid x18 register in assembler code

On Thu, Oct 24, 2019 at 03:51:17PM -0700, [email protected] wrote:
> From: Ard Biesheuvel <[email protected]>
>
> Register x18 will no longer be used as a caller save register in the
> future, so stop using it in the copy_page() code.
>
> Link: https://patchwork.kernel.org/patch/9836869/
> Signed-off-by: Ard Biesheuvel <[email protected]>
> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> arch/arm64/lib/copy_page.S | 38 +++++++++++++++++++-------------------
> 1 file changed, 19 insertions(+), 19 deletions(-)
>
> diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
> index bbb8562396af..8b562264c165 100644
> --- a/arch/arm64/lib/copy_page.S
> +++ b/arch/arm64/lib/copy_page.S
> @@ -34,45 +34,45 @@ alternative_else_nop_endif
> ldp x14, x15, [x1, #96]
> ldp x16, x17, [x1, #112]
>
> - mov x18, #(PAGE_SIZE - 128)
> + add x0, x0, #256
> add x1, x1, #128
> 1:
> - subs x18, x18, #128
> + tst x0, #(PAGE_SIZE - 1)
>
> alternative_if ARM64_HAS_NO_HW_PREFETCH
> prfm pldl1strm, [x1, #384]
> alternative_else_nop_endif
>
> - stnp x2, x3, [x0]
> + stnp x2, x3, [x0, #-256]
> ldp x2, x3, [x1]
> - stnp x4, x5, [x0, #16]
> + stnp x4, x5, [x0, #-240]
> ldp x4, x5, [x1, #16]

For legibility, could we make the offset and bias explicit in the STNPs
so that these line up? e.g.

stnp x4, x5, [x0, #16 - 256]
ldp x4, x5, [x1, #16]

... that'd make it much easier to see by eye that this is sound, much as
I trust my mental arithmetic. ;)

> - stnp x6, x7, [x0, #32]
> + stnp x6, x7, [x0, #-224]
> ldp x6, x7, [x1, #32]
> - stnp x8, x9, [x0, #48]
> + stnp x8, x9, [x0, #-208]
> ldp x8, x9, [x1, #48]
> - stnp x10, x11, [x0, #64]
> + stnp x10, x11, [x0, #-192]
> ldp x10, x11, [x1, #64]
> - stnp x12, x13, [x0, #80]
> + stnp x12, x13, [x0, #-176]
> ldp x12, x13, [x1, #80]
> - stnp x14, x15, [x0, #96]
> + stnp x14, x15, [x0, #-160]
> ldp x14, x15, [x1, #96]
> - stnp x16, x17, [x0, #112]
> + stnp x16, x17, [x0, #-144]
> ldp x16, x17, [x1, #112]
>
> add x0, x0, #128
> add x1, x1, #128
>
> - b.gt 1b
> + b.ne 1b
>
> - stnp x2, x3, [x0]
> - stnp x4, x5, [x0, #16]
> - stnp x6, x7, [x0, #32]
> - stnp x8, x9, [x0, #48]
> - stnp x10, x11, [x0, #64]
> - stnp x12, x13, [x0, #80]
> - stnp x14, x15, [x0, #96]
> - stnp x16, x17, [x0, #112]
> + stnp x2, x3, [x0, #-256]
> + stnp x4, x5, [x0, #-240]
> + stnp x6, x7, [x0, #-224]
> + stnp x8, x9, [x0, #-208]
> + stnp x10, x11, [x0, #-192]
> + stnp x12, x13, [x0, #-176]
> + stnp x14, x15, [x0, #-160]
> + stnp x16, x17, [x0, #-144]

... likewise here:

stnp xt1, xt2, [x0, #off - 256]

I don't see a nicer way to write this sequence without using an
additional register, so with those changes:

Reviewed-by: Mark Rutland <[email protected]>

Thanks,
Mark.

>
> ret
> ENDPROC(copy_page)
> --
> 2.24.0.rc0.303.g954a862665-goog
>

2019-10-25 19:43:25

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v2 05/17] add support for Clang's Shadow Call Stack (SCS)

On Thu, Oct 24, 2019 at 03:51:20PM -0700, [email protected] wrote:
> This change adds generic support for Clang's Shadow Call Stack,
> which uses a shadow stack to protect return addresses from being
> overwritten by an attacker. Details are available here:
>
> https://clang.llvm.org/docs/ShadowCallStack.html
>
> Note that security guarantees in the kernel differ from the
> ones documented for user space. The kernel must store addresses
> of shadow stacks used by other tasks and interrupt handlers in
> memory, which means an attacker capable reading and writing
> arbitrary memory may be able to locate them and hijack control
> flow by modifying shadow stacks that are not currently in use.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> Makefile | 6 ++
> arch/Kconfig | 33 +++++++
> include/linux/compiler-clang.h | 6 ++
> include/linux/compiler_types.h | 4 +
> include/linux/scs.h | 78 +++++++++++++++++
> init/init_task.c | 8 ++
> kernel/Makefile | 1 +
> kernel/fork.c | 9 ++
> kernel/sched/core.c | 2 +
> kernel/sched/sched.h | 1 +
> kernel/scs.c | 155 +++++++++++++++++++++++++++++++++
> 11 files changed, 303 insertions(+)
> create mode 100644 include/linux/scs.h
> create mode 100644 kernel/scs.c
>
> diff --git a/Makefile b/Makefile
> index 5475cdb6d57d..2b5c59fb18f2 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -846,6 +846,12 @@ ifdef CONFIG_LIVEPATCH
> KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
> endif
>
> +ifdef CONFIG_SHADOW_CALL_STACK
> +CC_FLAGS_SCS := -fsanitize=shadow-call-stack
> +KBUILD_CFLAGS += $(CC_FLAGS_SCS)
> +export CC_FLAGS_SCS
> +endif
> +
> # arch Makefile may override CC so keep this after arch Makefile is included
> NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 5f8a5d84dbbe..5e34cbcd8d6a 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -521,6 +521,39 @@ config STACKPROTECTOR_STRONG
> about 20% of all kernel functions, which increases the kernel code
> size by about 2%.
>
> +config ARCH_SUPPORTS_SHADOW_CALL_STACK
> + bool
> + help
> + An architecture should select this if it supports Clang's Shadow
> + Call Stack, has asm/scs.h, and implements runtime support for shadow
> + stack switching.
> +
> +config SHADOW_CALL_STACK_VMAP
> + bool
> + depends on SHADOW_CALL_STACK
> + help
> + Use virtually mapped shadow call stacks. Selecting this option
> + provides better stack exhaustion protection, but increases per-thread
> + memory consumption as a full page is allocated for each shadow stack.
> +
> +config SHADOW_CALL_STACK
> + bool "Clang Shadow Call Stack"
> + depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
> + help
> + This option enables Clang's Shadow Call Stack, which uses a
> + shadow stack to protect function return addresses from being
> + overwritten by an attacker. More information can be found from
> + Clang's documentation:
> +
> + https://clang.llvm.org/docs/ShadowCallStack.html
> +
> + Note that security guarantees in the kernel differ from the ones
> + documented for user space. The kernel must store addresses of shadow
> + stacks used by other tasks and interrupt handlers in memory, which
> + means an attacker capable reading and writing arbitrary memory may
> + be able to locate them and hijack control flow by modifying shadow
> + stacks that are not currently in use.
> +
> config HAVE_ARCH_WITHIN_STACK_FRAMES
> bool
> help
> diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
> index 333a6695a918..afe5e24088b2 100644
> --- a/include/linux/compiler-clang.h
> +++ b/include/linux/compiler-clang.h
> @@ -42,3 +42,9 @@
> * compilers, like ICC.
> */
> #define barrier() __asm__ __volatile__("" : : : "memory")
> +
> +#if __has_feature(shadow_call_stack)
> +# define __noscs __attribute__((no_sanitize("shadow-call-stack")))
> +#else
> +# define __noscs
> +#endif

Huh. I didn't realise it was valid to have a space after the `#` like
this. I see we're very inconsistent about style on that front, so this
is fine, I'll just have to get used to it. :)

> diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
> index 72393a8c1a6c..be5d5be4b1ae 100644
> --- a/include/linux/compiler_types.h
> +++ b/include/linux/compiler_types.h
> @@ -202,6 +202,10 @@ struct ftrace_likely_data {
> # define randomized_struct_fields_end
> #endif
>
> +#ifndef __noscs
> +# define __noscs
> +#endif
> +
> #ifndef asm_volatile_goto
> #define asm_volatile_goto(x...) asm goto(x)
> #endif
> diff --git a/include/linux/scs.h b/include/linux/scs.h
> new file mode 100644
> index 000000000000..c8b0ccfdd803
> --- /dev/null
> +++ b/include/linux/scs.h
> @@ -0,0 +1,78 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Shadow Call Stack support.
> + *
> + * Copyright (C) 2018 Google LLC
> + */
> +
> +#ifndef _LINUX_SCS_H
> +#define _LINUX_SCS_H
> +
> +#include <linux/gfp.h>
> +#include <linux/sched.h>
> +#include <asm/page.h>
> +
> +#ifdef CONFIG_SHADOW_CALL_STACK
> +
> +#define SCS_SIZE 1024

I think it'd be worth a comment on how this size was chosen. IIRC this
empirical?

> +#define SCS_END_MAGIC 0xaf0194819b1635f6UL

Keyboard smash? ... or is there a prize for whoever figures out the
secret? ;)

> +
> +#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
> +
> +static inline void *task_scs(struct task_struct *tsk)
> +{
> + return task_thread_info(tsk)->shadow_call_stack;
> +}
> +
> +static inline void task_set_scs(struct task_struct *tsk, void *s)
> +{
> + task_thread_info(tsk)->shadow_call_stack = s;
> +}

This should probably be named get and set, or have:

#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack)

... which can have a trivial implementation as NULL for the !SCS case.

> +
> +extern void scs_init(void);
> +extern void scs_task_init(struct task_struct *tsk);
> +extern void scs_task_reset(struct task_struct *tsk);
> +extern int scs_prepare(struct task_struct *tsk, int node);
> +extern bool scs_corrupted(struct task_struct *tsk);
> +extern void scs_release(struct task_struct *tsk);
> +
> +#else /* CONFIG_SHADOW_CALL_STACK */
> +
> +static inline void *task_scs(struct task_struct *tsk)
> +{
> + return 0;
> +}

For all the trivial wrappers you can put the implementation on the same
line as the prototype. That makes it a bit easier to compare against the
prototypes on the other side of the ifdeffery.

e.g. this lot can be:

static inline void *task_scs(struct task_struct *tsk) { return 0; }
static inline void task_set_scs(struct task_struct *tsk, void *s) { }
static inline void scs_init(void) { }
...

> +#endif /* CONFIG_SHADOW_CALL_STACK */
> +
> +#endif /* _LINUX_SCS_H */
> diff --git a/init/init_task.c b/init/init_task.c
> index 9e5cbe5eab7b..cbd40460e903 100644
> --- a/init/init_task.c
> +++ b/init/init_task.c
> @@ -11,6 +11,7 @@
> #include <linux/mm.h>
> #include <linux/audit.h>
> #include <linux/numa.h>
> +#include <linux/scs.h>
>
> #include <asm/pgtable.h>
> #include <linux/uaccess.h>
> @@ -184,6 +185,13 @@ struct task_struct init_task
> };
> EXPORT_SYMBOL(init_task);
>
> +#ifdef CONFIG_SHADOW_CALL_STACK
> +unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
> + __aligned(SCS_SIZE) = {
> + [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
> +};
> +#endif
> +
> /*
> * Initial thread structure. Alignment of this is handled by a special
> * linker map entry.
> diff --git a/kernel/Makefile b/kernel/Makefile
> index daad787fb795..313dbd44d576 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -102,6 +102,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
> obj-$(CONFIG_IRQ_WORK) += irq_work.o
> obj-$(CONFIG_CPU_PM) += cpu_pm.o
> obj-$(CONFIG_BPF) += bpf/
> +obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
>
> obj-$(CONFIG_PERF_EVENTS) += events/
>
> diff --git a/kernel/fork.c b/kernel/fork.c
> index bcdf53125210..ae7ebe9f0586 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -94,6 +94,7 @@
> #include <linux/livepatch.h>
> #include <linux/thread_info.h>
> #include <linux/stackleak.h>
> +#include <linux/scs.h>

Nit: alphabetical order, please (this should come before stackleak.h).

>
> #include <asm/pgtable.h>
> #include <asm/pgalloc.h>
> @@ -451,6 +452,8 @@ void put_task_stack(struct task_struct *tsk)
>
> void free_task(struct task_struct *tsk)
> {
> + scs_release(tsk);
> +
> #ifndef CONFIG_THREAD_INFO_IN_TASK
> /*
> * The task is finally done with both the stack and thread_info,
> @@ -834,6 +837,8 @@ void __init fork_init(void)
> NULL, free_vm_stack_cache);
> #endif
>
> + scs_init();
> +
> lockdep_init_task(&init_task);
> uprobes_init();
> }
> @@ -907,6 +912,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
> clear_user_return_notifier(tsk);
> clear_tsk_need_resched(tsk);
> set_task_stack_end_magic(tsk);
> + scs_task_init(tsk);
>
> #ifdef CONFIG_STACKPROTECTOR
> tsk->stack_canary = get_random_canary();
> @@ -2022,6 +2028,9 @@ static __latent_entropy struct task_struct *copy_process(
> args->tls);
> if (retval)
> goto bad_fork_cleanup_io;
> + retval = scs_prepare(p, node);
> + if (retval)
> + goto bad_fork_cleanup_thread;

Can we please fold scs_prepare() into scs_task_init() and do this in
dup_task_struct()? That way we set this up consistently in one place,
where we're also allocating the regular stack.

Arguably stackleak_task_init() would better fit there too.

>
> stackleak_task_init(p);
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index dd05a378631a..e7faeb383008 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -6013,6 +6013,8 @@ void init_idle(struct task_struct *idle, int cpu)
> raw_spin_lock_irqsave(&idle->pi_lock, flags);
> raw_spin_lock(&rq->lock);
>
> + scs_task_reset(idle);

I'm a bit confused by this -- please see comments below on
scs_task_reset().

> +
> __sched_fork(0, idle);
> idle->state = TASK_RUNNING;
> idle->se.exec_start = sched_clock();
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 0db2c1b3361e..c153003a011c 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -58,6 +58,7 @@
> #include <linux/profile.h>
> #include <linux/psi.h>
> #include <linux/rcupdate_wait.h>
> +#include <linux/scs.h>
> #include <linux/security.h>
> #include <linux/stop_machine.h>
> #include <linux/suspend.h>
> diff --git a/kernel/scs.c b/kernel/scs.c
> new file mode 100644
> index 000000000000..383d29e8c199
> --- /dev/null
> +++ b/kernel/scs.c
> @@ -0,0 +1,155 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Shadow Call Stack support.
> + *
> + * Copyright (C) 2019 Google LLC
> + */
> +
> +#include <linux/cpuhotplug.h>
> +#include <linux/mm.h>
> +#include <linux/slab.h>
> +#include <linux/scs.h>

Nit: alphabetical order, please.

> +#include <linux/vmalloc.h>
> +#include <asm/scs.h>
> +
> +static inline void *__scs_base(struct task_struct *tsk)
> +{
> + return (void *)((uintptr_t)task_scs(tsk) & ~(SCS_SIZE - 1));
> +}

We only ever assign the base to task_scs(tsk), with the current live
value being in a register that we don't read. Are we expecting arch code
to keep this up-to-date with the register value?

I would have expected that we just leave this as the base (as we do for
the regular stack in the task struct), and it's down to arch code to
save/restore the current value where necessary.

Am I missing some caveat with that approach?

> +
> +#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
> +
> +/* Keep a cache of shadow stacks */
> +#define SCS_CACHE_SIZE 2
> +static DEFINE_PER_CPU(void *, scs_cache[SCS_CACHE_SIZE]);
> +
> +static void *scs_alloc(int node)
> +{
> + int i;
> +
> + for (i = 0; i < SCS_CACHE_SIZE; i++) {
> + void *s;
> +
> + s = this_cpu_xchg(scs_cache[i], NULL);
> + if (s) {
> + memset(s, 0, SCS_SIZE);
> + return s;
> + }
> + }
> +
> + BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);

It's probably worth a comment on why we rely on SCS_SIZE <= PAGE_SIZE.

> +
> + return __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
> + VMALLOC_START, VMALLOC_END,
> + GFP_SCS, PAGE_KERNEL, 0,
> + node, __builtin_return_address(0));
> +}
> +
> +static void scs_free(void *s)
> +{
> + int i;
> +
> + for (i = 0; i < SCS_CACHE_SIZE; i++) {
> + if (this_cpu_cmpxchg(scs_cache[i], 0, s) != 0)
> + continue;
> +
> + return;
> + }
> +
> + vfree_atomic(s);
> +}
> +
> +static int scs_cleanup(unsigned int cpu)
> +{
> + int i;
> + void **cache = per_cpu_ptr(scs_cache, cpu);
> +
> + for (i = 0; i < SCS_CACHE_SIZE; i++) {
> + vfree(cache[i]);
> + cache[i] = NULL;
> + }
> +
> + return 0;
> +}
> +
> +void __init scs_init(void)
> +{
> + cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
> + scs_cleanup);
> +}
> +
> +#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */
> +
> +static struct kmem_cache *scs_cache;
> +
> +static inline void *scs_alloc(int node)
> +{
> + return kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
> +}
> +
> +static inline void scs_free(void *s)
> +{
> + kmem_cache_free(scs_cache, s);
> +}
> +
> +void __init scs_init(void)
> +{
> + scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
> + 0, NULL);
> + WARN_ON(!scs_cache);
> +}
> +
> +#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
> +
> +static inline unsigned long *scs_magic(struct task_struct *tsk)
> +{
> + return (unsigned long *)(__scs_base(tsk) + SCS_SIZE - sizeof(long));

Slightly simpler as:

return (unsigned long *)(__scs_base(tsk) + SCS_SIZE) - 1;

Thanks,
Mark.

2019-10-25 20:44:31

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH v2 05/17] add support for Clang's Shadow Call Stack (SCS)

On Thu, Oct 24, 2019 at 3:51 PM <[email protected]> wrote:
>
> This change adds generic support for Clang's Shadow Call Stack,
> which uses a shadow stack to protect return addresses from being
> overwritten by an attacker. Details are available here:
>
> https://clang.llvm.org/docs/ShadowCallStack.html
>
> Note that security guarantees in the kernel differ from the
> ones documented for user space. The kernel must store addresses
> of shadow stacks used by other tasks and interrupt handlers in
> memory, which means an attacker capable reading and writing
> arbitrary memory may be able to locate them and hijack control
> flow by modifying shadow stacks that are not currently in use.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> Makefile | 6 ++
> arch/Kconfig | 33 +++++++
> include/linux/compiler-clang.h | 6 ++
> include/linux/compiler_types.h | 4 +
> include/linux/scs.h | 78 +++++++++++++++++
> init/init_task.c | 8 ++
> kernel/Makefile | 1 +
> kernel/fork.c | 9 ++
> kernel/sched/core.c | 2 +
> kernel/sched/sched.h | 1 +
> kernel/scs.c | 155 +++++++++++++++++++++++++++++++++
> 11 files changed, 303 insertions(+)
> create mode 100644 include/linux/scs.h
> create mode 100644 kernel/scs.c
>
> diff --git a/Makefile b/Makefile
> index 5475cdb6d57d..2b5c59fb18f2 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -846,6 +846,12 @@ ifdef CONFIG_LIVEPATCH
> KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
> endif
>
> +ifdef CONFIG_SHADOW_CALL_STACK
> +CC_FLAGS_SCS := -fsanitize=shadow-call-stack
> +KBUILD_CFLAGS += $(CC_FLAGS_SCS)
> +export CC_FLAGS_SCS
> +endif
> +
> # arch Makefile may override CC so keep this after arch Makefile is included
> NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 5f8a5d84dbbe..5e34cbcd8d6a 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -521,6 +521,39 @@ config STACKPROTECTOR_STRONG
> about 20% of all kernel functions, which increases the kernel code
> size by about 2%.
>
> +config ARCH_SUPPORTS_SHADOW_CALL_STACK
> + bool
> + help
> + An architecture should select this if it supports Clang's Shadow
> + Call Stack, has asm/scs.h, and implements runtime support for shadow
> + stack switching.
> +
> +config SHADOW_CALL_STACK_VMAP
> + bool
> + depends on SHADOW_CALL_STACK
> + help
> + Use virtually mapped shadow call stacks. Selecting this option
> + provides better stack exhaustion protection, but increases per-thread
> + memory consumption as a full page is allocated for each shadow stack.
> +
> +config SHADOW_CALL_STACK
> + bool "Clang Shadow Call Stack"
> + depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
> + help
> + This option enables Clang's Shadow Call Stack, which uses a
> + shadow stack to protect function return addresses from being
> + overwritten by an attacker. More information can be found from
> + Clang's documentation:
> +
> + https://clang.llvm.org/docs/ShadowCallStack.html
> +
> + Note that security guarantees in the kernel differ from the ones
> + documented for user space. The kernel must store addresses of shadow
> + stacks used by other tasks and interrupt handlers in memory, which
> + means an attacker capable reading and writing arbitrary memory may
> + be able to locate them and hijack control flow by modifying shadow
> + stacks that are not currently in use.
> +
> config HAVE_ARCH_WITHIN_STACK_FRAMES
> bool
> help
> diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
> index 333a6695a918..afe5e24088b2 100644
> --- a/include/linux/compiler-clang.h
> +++ b/include/linux/compiler-clang.h
> @@ -42,3 +42,9 @@
> * compilers, like ICC.
> */
> #define barrier() __asm__ __volatile__("" : : : "memory")
> +
> +#if __has_feature(shadow_call_stack)
> +# define __noscs __attribute__((no_sanitize("shadow-call-stack")))
> +#else
> +# define __noscs
> +#endif
> diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
> index 72393a8c1a6c..be5d5be4b1ae 100644
> --- a/include/linux/compiler_types.h
> +++ b/include/linux/compiler_types.h
> @@ -202,6 +202,10 @@ struct ftrace_likely_data {
> # define randomized_struct_fields_end
> #endif
>
> +#ifndef __noscs
> +# define __noscs
> +#endif
> +
> #ifndef asm_volatile_goto
> #define asm_volatile_goto(x...) asm goto(x)
> #endif
> diff --git a/include/linux/scs.h b/include/linux/scs.h
> new file mode 100644
> index 000000000000..c8b0ccfdd803
> --- /dev/null
> +++ b/include/linux/scs.h
> @@ -0,0 +1,78 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Shadow Call Stack support.
> + *
> + * Copyright (C) 2018 Google LLC
> + */
> +
> +#ifndef _LINUX_SCS_H
> +#define _LINUX_SCS_H
> +
> +#include <linux/gfp.h>
> +#include <linux/sched.h>
> +#include <asm/page.h>
> +
> +#ifdef CONFIG_SHADOW_CALL_STACK
> +
> +#define SCS_SIZE 1024
> +#define SCS_END_MAGIC 0xaf0194819b1635f6UL
> +
> +#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
> +
> +static inline void *task_scs(struct task_struct *tsk)
> +{
> + return task_thread_info(tsk)->shadow_call_stack;
> +}
> +
> +static inline void task_set_scs(struct task_struct *tsk, void *s)
> +{
> + task_thread_info(tsk)->shadow_call_stack = s;
> +}
> +
> +extern void scs_init(void);
> +extern void scs_task_init(struct task_struct *tsk);
> +extern void scs_task_reset(struct task_struct *tsk);
> +extern int scs_prepare(struct task_struct *tsk, int node);
> +extern bool scs_corrupted(struct task_struct *tsk);
> +extern void scs_release(struct task_struct *tsk);
> +
> +#else /* CONFIG_SHADOW_CALL_STACK */
> +
> +static inline void *task_scs(struct task_struct *tsk)
> +{
> + return 0;
> +}
> +
> +static inline void task_set_scs(struct task_struct *tsk, void *s)
> +{
> +}
> +
> +static inline void scs_init(void)
> +{
> +}
> +
> +static inline void scs_task_init(struct task_struct *tsk)
> +{
> +}
> +
> +static inline void scs_task_reset(struct task_struct *tsk)
> +{
> +}
> +
> +static inline int scs_prepare(struct task_struct *tsk, int node)
> +{
> + return 0;
> +}
> +
> +static inline bool scs_corrupted(struct task_struct *tsk)
> +{
> + return false;
> +}
> +
> +static inline void scs_release(struct task_struct *tsk)
> +{
> +}
> +
> +#endif /* CONFIG_SHADOW_CALL_STACK */
> +
> +#endif /* _LINUX_SCS_H */
> diff --git a/init/init_task.c b/init/init_task.c
> index 9e5cbe5eab7b..cbd40460e903 100644
> --- a/init/init_task.c
> +++ b/init/init_task.c
> @@ -11,6 +11,7 @@
> #include <linux/mm.h>
> #include <linux/audit.h>
> #include <linux/numa.h>
> +#include <linux/scs.h>
>
> #include <asm/pgtable.h>
> #include <linux/uaccess.h>
> @@ -184,6 +185,13 @@ struct task_struct init_task
> };
> EXPORT_SYMBOL(init_task);
>
> +#ifdef CONFIG_SHADOW_CALL_STACK
> +unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
> + __aligned(SCS_SIZE) = {
> + [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
> +};
> +#endif
> +
> /*
> * Initial thread structure. Alignment of this is handled by a special
> * linker map entry.
> diff --git a/kernel/Makefile b/kernel/Makefile
> index daad787fb795..313dbd44d576 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -102,6 +102,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
> obj-$(CONFIG_IRQ_WORK) += irq_work.o
> obj-$(CONFIG_CPU_PM) += cpu_pm.o
> obj-$(CONFIG_BPF) += bpf/
> +obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
>
> obj-$(CONFIG_PERF_EVENTS) += events/
>
> diff --git a/kernel/fork.c b/kernel/fork.c
> index bcdf53125210..ae7ebe9f0586 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -94,6 +94,7 @@
> #include <linux/livepatch.h>
> #include <linux/thread_info.h>
> #include <linux/stackleak.h>
> +#include <linux/scs.h>
>
> #include <asm/pgtable.h>
> #include <asm/pgalloc.h>
> @@ -451,6 +452,8 @@ void put_task_stack(struct task_struct *tsk)
>
> void free_task(struct task_struct *tsk)
> {
> + scs_release(tsk);
> +
> #ifndef CONFIG_THREAD_INFO_IN_TASK
> /*
> * The task is finally done with both the stack and thread_info,
> @@ -834,6 +837,8 @@ void __init fork_init(void)
> NULL, free_vm_stack_cache);
> #endif
>
> + scs_init();
> +
> lockdep_init_task(&init_task);
> uprobes_init();
> }
> @@ -907,6 +912,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
> clear_user_return_notifier(tsk);
> clear_tsk_need_resched(tsk);
> set_task_stack_end_magic(tsk);
> + scs_task_init(tsk);
>
> #ifdef CONFIG_STACKPROTECTOR
> tsk->stack_canary = get_random_canary();
> @@ -2022,6 +2028,9 @@ static __latent_entropy struct task_struct *copy_process(
> args->tls);
> if (retval)
> goto bad_fork_cleanup_io;
> + retval = scs_prepare(p, node);
> + if (retval)
> + goto bad_fork_cleanup_thread;
>
> stackleak_task_init(p);
>
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index dd05a378631a..e7faeb383008 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -6013,6 +6013,8 @@ void init_idle(struct task_struct *idle, int cpu)
> raw_spin_lock_irqsave(&idle->pi_lock, flags);
> raw_spin_lock(&rq->lock);
>
> + scs_task_reset(idle);
> +
> __sched_fork(0, idle);
> idle->state = TASK_RUNNING;
> idle->se.exec_start = sched_clock();
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 0db2c1b3361e..c153003a011c 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -58,6 +58,7 @@
> #include <linux/profile.h>
> #include <linux/psi.h>
> #include <linux/rcupdate_wait.h>
> +#include <linux/scs.h>
> #include <linux/security.h>
> #include <linux/stop_machine.h>
> #include <linux/suspend.h>
> diff --git a/kernel/scs.c b/kernel/scs.c
> new file mode 100644
> index 000000000000..383d29e8c199
> --- /dev/null
> +++ b/kernel/scs.c
> @@ -0,0 +1,155 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Shadow Call Stack support.
> + *
> + * Copyright (C) 2019 Google LLC
> + */
> +
> +#include <linux/cpuhotplug.h>
> +#include <linux/mm.h>
> +#include <linux/slab.h>
> +#include <linux/scs.h>
> +#include <linux/vmalloc.h>
> +#include <asm/scs.h>
> +
> +static inline void *__scs_base(struct task_struct *tsk)
> +{
> + return (void *)((uintptr_t)task_scs(tsk) & ~(SCS_SIZE - 1));
> +}
> +
> +#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
> +
> +/* Keep a cache of shadow stacks */
> +#define SCS_CACHE_SIZE 2
> +static DEFINE_PER_CPU(void *, scs_cache[SCS_CACHE_SIZE]);
> +
> +static void *scs_alloc(int node)
> +{
> + int i;
> +
> + for (i = 0; i < SCS_CACHE_SIZE; i++) {
> + void *s;
> +
> + s = this_cpu_xchg(scs_cache[i], NULL);
> + if (s) {
> + memset(s, 0, SCS_SIZE);
> + return s;
> + }
> + }
> +
> + BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);
> +
> + return __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
> + VMALLOC_START, VMALLOC_END,
> + GFP_SCS, PAGE_KERNEL, 0,
> + node, __builtin_return_address(0));
> +}
> +
> +static void scs_free(void *s)
> +{
> + int i;
> +
> + for (i = 0; i < SCS_CACHE_SIZE; i++) {
> + if (this_cpu_cmpxchg(scs_cache[i], 0, s) != 0)
> + continue;
> +
> + return;
> + }

prefer:

for ...:
if foo() == 0:
return

to:

for ...:
if foo() != 0:
continue
return

> +
> + vfree_atomic(s);
> +}
> +
> +static int scs_cleanup(unsigned int cpu)
> +{
> + int i;
> + void **cache = per_cpu_ptr(scs_cache, cpu);
> +
> + for (i = 0; i < SCS_CACHE_SIZE; i++) {
> + vfree(cache[i]);
> + cache[i] = NULL;
> + }
> +
> + return 0;
> +}
> +
> +void __init scs_init(void)
> +{
> + cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
> + scs_cleanup);
> +}
> +
> +#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */
> +
> +static struct kmem_cache *scs_cache;
> +
> +static inline void *scs_alloc(int node)
> +{
> + return kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
> +}
> +
> +static inline void scs_free(void *s)
> +{
> + kmem_cache_free(scs_cache, s);
> +}
> +
> +void __init scs_init(void)
> +{
> + scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
> + 0, NULL);
> + WARN_ON(!scs_cache);
> +}
> +
> +#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
> +
> +static inline unsigned long *scs_magic(struct task_struct *tsk)
> +{
> + return (unsigned long *)(__scs_base(tsk) + SCS_SIZE - sizeof(long));
> +}
> +
> +static inline void scs_set_magic(struct task_struct *tsk)
> +{
> + *scs_magic(tsk) = SCS_END_MAGIC;
> +}
> +
> +void scs_task_init(struct task_struct *tsk)
> +{
> + task_set_scs(tsk, NULL);
> +}
> +
> +void scs_task_reset(struct task_struct *tsk)
> +{
> + task_set_scs(tsk, __scs_base(tsk));
> +}
> +
> +int scs_prepare(struct task_struct *tsk, int node)
> +{
> + void *s;
> +
> + s = scs_alloc(node);
> + if (!s)
> + return -ENOMEM;
> +
> + task_set_scs(tsk, s);
> + scs_set_magic(tsk);
> +
> + return 0;
> +}
> +
> +bool scs_corrupted(struct task_struct *tsk)
> +{
> + return *scs_magic(tsk) != SCS_END_MAGIC;
> +}
> +
> +void scs_release(struct task_struct *tsk)
> +{
> + void *s;
> +
> + s = __scs_base(tsk);
> + if (!s)
> + return;
> +
> + WARN_ON(scs_corrupted(tsk));
> +
> + scs_task_init(tsk);
> + scs_free(s);
> +}
> --
> 2.24.0.rc0.303.g954a862665-goog
>


--
Thanks,
~Nick Desaulniers

2019-10-25 20:57:02

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v2 16/17] arm64: disable SCS for hypervisor code

On Thu, Oct 24, 2019 at 6:31 PM Masahiro Yamada
<[email protected]> wrote:
> $(subst ... ) is not the correct use here.
>
> It works like sed, s/$(CC_CFLAGS_SCS)//
> instead of matching by word.
>
>
>
>
> KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
>
> is more correct, and simpler.

Thanks, I will change this in v3.

Sami

2019-10-25 20:58:32

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v2 05/17] add support for Clang's Shadow Call Stack (SCS)

On Fri, Oct 25, 2019 at 9:22 AM Nick Desaulniers
<[email protected]> wrote:
> > +static void scs_free(void *s)
> > +{
> > + int i;
> > +
> > + for (i = 0; i < SCS_CACHE_SIZE; i++) {
> > + if (this_cpu_cmpxchg(scs_cache[i], 0, s) != 0)
> > + continue;
> > +
> > + return;
> > + }
>
> prefer:
>
> for ...:
> if foo() == 0:
> return
>
> to:
>
> for ...:
> if foo() != 0:
> continue
> return

This was essentially copied from free_thread_stack in kernel/fork.c,
but I agree, your way is cleaner. I'll change this in the next
version. Thanks!

Sami

2019-10-25 21:00:28

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v2 05/17] add support for Clang's Shadow Call Stack (SCS)

On Fri, Oct 25, 2019 at 3:56 AM Mark Rutland <[email protected]> wrote:
> > +#define SCS_SIZE 1024
>
> I think it'd be worth a comment on how this size was chosen. IIRC this
> empirical?

Correct. I'll add a comment.

> > +#define SCS_END_MAGIC 0xaf0194819b1635f6UL
>
> Keyboard smash? ... or is there a prize for whoever figures out the
> secret? ;)

It's a random number, so if someone figures out a secret in it,
they'll definitely deserve a prize. :)

> > +static inline void task_set_scs(struct task_struct *tsk, void *s)
> > +{
> > + task_thread_info(tsk)->shadow_call_stack = s;
> > +}
>
> This should probably be named get and set, or have:
>
> #define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack)
>
> ... which can have a trivial implementation as NULL for the !SCS case.

Sure, sounds good.

> For all the trivial wrappers you can put the implementation on the same
> line as the prototype. That makes it a bit easier to compare against the
> prototypes on the other side of the ifdeffery.
>
> e.g. this lot can be:
>
> static inline void *task_scs(struct task_struct *tsk) { return 0; }
> static inline void task_set_scs(struct task_struct *tsk, void *s) { }
> static inline void scs_init(void) { }

Agreed.

> > diff --git a/kernel/fork.c b/kernel/fork.c
> > index bcdf53125210..ae7ebe9f0586 100644
> > --- a/kernel/fork.c
> > +++ b/kernel/fork.c
> > @@ -94,6 +94,7 @@
> > #include <linux/livepatch.h>
> > #include <linux/thread_info.h>
> > #include <linux/stackleak.h>
> > +#include <linux/scs.h>
>
> Nit: alphabetical order, please (this should come before stackleak.h).

The includes in kernel/fork.c aren't in alphabetical order, so I just
added this to the end here.

> > + retval = scs_prepare(p, node);
> > + if (retval)
> > + goto bad_fork_cleanup_thread;
>
> Can we please fold scs_prepare() into scs_task_init() and do this in
> dup_task_struct()? That way we set this up consistently in one place,
> where we're also allocating the regular stack.

Yes, that does sound cleaner. I'll change that.

> > + scs_task_reset(idle);
>
> I'm a bit confused by this -- please see comments below on
> scs_task_reset().

> > +static inline void *__scs_base(struct task_struct *tsk)
> > +{
> > + return (void *)((uintptr_t)task_scs(tsk) & ~(SCS_SIZE - 1));
> > +}
>
> We only ever assign the base to task_scs(tsk), with the current live
> value being in a register that we don't read. Are we expecting arch code
> to keep this up-to-date with the register value?
>
> I would have expected that we just leave this as the base (as we do for
> the regular stack in the task struct), and it's down to arch code to
> save/restore the current value where necessary.
>
> Am I missing some caveat with that approach?

To keep the address of the currently active shadow stack out of
memory, the arm64 implementation clears this field when it loads x18
and saves the current value before a context switch. The generic code
doesn't expect the arch code to necessarily do so, but does allow it.
This requires us to use __scs_base() when accessing the base pointer
and to reset it in idle tasks before they're reused, hence
scs_task_reset().

> > + BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);
>
> It's probably worth a comment on why we rely on SCS_SIZE <= PAGE_SIZE.

Ack.

> > +static inline unsigned long *scs_magic(struct task_struct *tsk)
> > +{
> > + return (unsigned long *)(__scs_base(tsk) + SCS_SIZE - sizeof(long));
>
> Slightly simpler as:
>
> return (unsigned long *)(__scs_base(tsk) + SCS_SIZE) - 1;

Yes, that's a bit cleaner.

I'll fix these in v3. Thank you for the review!

Sami

2019-10-25 21:41:58

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v2 02/17] arm64/lib: copy_page: avoid x18 register in assembler code

On Fri, Oct 25, 2019 at 2:41 AM Mark Rutland <[email protected]> wrote:
> > diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
> > index bbb8562396af..8b562264c165 100644
> > --- a/arch/arm64/lib/copy_page.S
> > +++ b/arch/arm64/lib/copy_page.S
> > @@ -34,45 +34,45 @@ alternative_else_nop_endif
> > ldp x14, x15, [x1, #96]
> > ldp x16, x17, [x1, #112]
> >
> > - mov x18, #(PAGE_SIZE - 128)
> > + add x0, x0, #256
> > add x1, x1, #128
> > 1:
> > - subs x18, x18, #128
> > + tst x0, #(PAGE_SIZE - 1)
> >
> > alternative_if ARM64_HAS_NO_HW_PREFETCH
> > prfm pldl1strm, [x1, #384]
> > alternative_else_nop_endif
> >
> > - stnp x2, x3, [x0]
> > + stnp x2, x3, [x0, #-256]
> > ldp x2, x3, [x1]
> > - stnp x4, x5, [x0, #16]
> > + stnp x4, x5, [x0, #-240]
> > ldp x4, x5, [x1, #16]
>
> For legibility, could we make the offset and bias explicit in the STNPs
> so that these line up? e.g.
>
> stnp x4, x5, [x0, #16 - 256]
> ldp x4, x5, [x1, #16]
>
> ... that'd make it much easier to see by eye that this is sound, much as
> I trust my mental arithmetic. ;)

Sure, that makes sense. I'll change this in v3.

Sami

2019-10-25 22:12:55

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v2 11/17] arm64: reserve x18 from general allocation with SCS

Reserve the x18 register from general allocation when SCS is enabled,
because the compiler uses the register to store the current task's
shadow stack pointer. Note that all external kernel modules must also be
compiled with -ffixed-x18 if the kernel has SCS enabled.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/Makefile | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 2c0238ce0551..ef76101201b2 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -72,6 +72,10 @@ stack_protector_prepare: prepare0
include/generated/asm-offsets.h))
endif

+ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
+KBUILD_CFLAGS += -ffixed-x18
+endif
+
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
CHECKFLAGS += -D__AARCH64EB__
--
2.24.0.rc0.303.g954a862665-goog

2019-10-25 22:29:11

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v2 04/17] arm64: kernel: avoid x18 as an arbitrary temp register

Minor nit, but could we make the title a bit more specific (and more
uniform across the series)? e.g.

arm64: kvm: avoid x18 in __cpu_soft_restart

That makes things a bit nicer when trawling through git logs as the
scope of the patch is clearer.

On Thu, Oct 24, 2019 at 03:51:19PM -0700, [email protected] wrote:
> From: Ard Biesheuvel <[email protected]>
>
> The code in __cpu_soft_restart() uses x18 as an arbitrary temp register,
> which will shortly be disallowed. So use x8 instead.
>
> Link: https://patchwork.kernel.org/patch/9836877/
> Signed-off-by: Ard Biesheuvel <[email protected]>
> Signed-off-by: Sami Tolvanen <[email protected]>

Either way:

Reviewed-by: Mark Rutland <[email protected]>

> ---
> arch/arm64/kernel/cpu-reset.S | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
> index 6ea337d464c4..32c7bf858dd9 100644
> --- a/arch/arm64/kernel/cpu-reset.S
> +++ b/arch/arm64/kernel/cpu-reset.S
> @@ -42,11 +42,11 @@ ENTRY(__cpu_soft_restart)
> mov x0, #HVC_SOFT_RESTART
> hvc #0 // no return
>
> -1: mov x18, x1 // entry
> +1: mov x8, x1 // entry
> mov x0, x2 // arg0
> mov x1, x3 // arg1
> mov x2, x4 // arg2
> - br x18
> + br x8
> ENDPROC(__cpu_soft_restart)
>
> .popsection
> --
> 2.24.0.rc0.303.g954a862665-goog
>

2019-10-25 22:31:40

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v2 09/17] arm64: disable function graph tracing with SCS

On Thu, Oct 24, 2019 at 03:51:24PM -0700, [email protected] wrote:
> With CONFIG_FUNCTION_GRAPH_TRACER, function return addresses are
> modified in ftrace_graph_caller and prepare_ftrace_return to redirect
> control flow to ftrace_return_to_handler. This is incompatible with
> SCS.

I'm guessing it's difficult to always figure out the SCS slot for an
instrumented callsite unless we pass this explicitly from the ftrace
entry code, so we'd probably have to change some common infrastructure
for that.

We have a similar issue with pointer authentication, and we're solving
that with -fpatchable-function-entry, which allows us to hook the
callsite before it does anything with the return address. IIUC we could
use the same mechanism here (and avoid introducing a third).

Are there plans to implement -fpatchable-function-entry on the clang
side?

Thanks,
Mark.

>
> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> arch/arm64/Kconfig | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 3f047afb982c..8cda176dad9a 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -148,7 +148,7 @@ config ARM64
> select HAVE_FTRACE_MCOUNT_RECORD
> select HAVE_FUNCTION_TRACER
> select HAVE_FUNCTION_ERROR_INJECTION
> - select HAVE_FUNCTION_GRAPH_TRACER
> + select HAVE_FUNCTION_GRAPH_TRACER if !SHADOW_CALL_STACK
> select HAVE_GCC_PLUGINS
> select HAVE_HW_BREAKPOINT if PERF_EVENTS
> select HAVE_IRQ_TIME_ACCOUNTING
> --
> 2.24.0.rc0.303.g954a862665-goog
>

2019-10-26 16:01:38

by Joe Perches

[permalink] [raw]
Subject: Re: [PATCH v2 05/17] add support for Clang's Shadow Call Stack (SCS)

On Thu, 2019-10-24 at 15:51 -0700, [email protected] wrote:
> This change adds generic support for Clang's Shadow Call Stack,
> which uses a shadow stack to protect return addresses from being
> overwritten by an attacker. Details are available here:
[]
> diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
[]
> @@ -42,3 +42,9 @@
> * compilers, like ICC.
> */
> #define barrier() __asm__ __volatile__("" : : : "memory")
> +
> +#if __has_feature(shadow_call_stack)
> +# define __noscs __attribute__((no_sanitize("shadow-call-stack")))

__no_sanitize__


2019-10-28 21:11:16

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v2 05/17] add support for Clang's Shadow Call Stack (SCS)

Hi Joe,

On Sat, Oct 26, 2019 at 8:57 AM Joe Perches <[email protected]> wrote:
> > +#if __has_feature(shadow_call_stack)
> > +# define __noscs __attribute__((no_sanitize("shadow-call-stack")))
>
> __no_sanitize__

Sorry, I missed your earlier message about this. I'm following Clang's
documentation for the attribute:

https://clang.llvm.org/docs/ShadowCallStack.html#attribute-no-sanitize-shadow-call-stack

Although __no_sanitize__ seems to work too. Is there a particular
reason to prefer that form over the one in the documentation?

Sami

2019-10-28 21:16:23

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v2 05/17] add support for Clang's Shadow Call Stack (SCS)

On Mon, Oct 28, 2019 at 8:31 AM Miguel Ojeda
<[email protected]> wrote:
> We decided to do it like that when I introduced compiler_attributes.h.
>
> Given it is hidden behind a definition, we don't care about which one we use internally; therefore the idea was to avoid clashes as much as possible with other names/definitions/etc.
>
> The syntax is supported in the compilers we care about (for docs on attributes, the best reference is GCC's by the way).

Got it, thank you for explaining. I'll change this to __no_sanitize__
in v3 since Clang seems to be happy with either version.

Sami

2019-10-28 21:21:36

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v2 05/17] add support for Clang's Shadow Call Stack (SCS)

On Fri, Oct 25, 2019 at 01:49:21PM -0700, Sami Tolvanen wrote:
> On Fri, Oct 25, 2019 at 3:56 AM Mark Rutland <[email protected]> wrote:
> > > +#define SCS_END_MAGIC 0xaf0194819b1635f6UL
> >
> > Keyboard smash? ... or is there a prize for whoever figures out the
> > secret? ;)
>
> It's a random number, so if someone figures out a secret in it,
> they'll definitely deserve a prize. :)

I'll Cc some treasure hunters. :)

> > > diff --git a/kernel/fork.c b/kernel/fork.c
> > > index bcdf53125210..ae7ebe9f0586 100644
> > > --- a/kernel/fork.c
> > > +++ b/kernel/fork.c
> > > @@ -94,6 +94,7 @@
> > > #include <linux/livepatch.h>
> > > #include <linux/thread_info.h>
> > > #include <linux/stackleak.h>
> > > +#include <linux/scs.h>
> >
> > Nit: alphabetical order, please (this should come before stackleak.h).
>
> The includes in kernel/fork.c aren't in alphabetical order, so I just
> added this to the end here.

Fair enough. It looked otherwise in the context, and we generally aim
for that as a soft rule.

[...]

> > > +static inline void *__scs_base(struct task_struct *tsk)
> > > +{
> > > + return (void *)((uintptr_t)task_scs(tsk) & ~(SCS_SIZE - 1));
> > > +}
> >
> > We only ever assign the base to task_scs(tsk), with the current live
> > value being in a register that we don't read. Are we expecting arch code
> > to keep this up-to-date with the register value?
> >
> > I would have expected that we just leave this as the base (as we do for
> > the regular stack in the task struct), and it's down to arch code to
> > save/restore the current value where necessary.
> >
> > Am I missing some caveat with that approach?
>
> To keep the address of the currently active shadow stack out of
> memory, the arm64 implementation clears this field when it loads x18
> and saves the current value before a context switch. The generic code
> doesn't expect the arch code to necessarily do so, but does allow it.
> This requires us to use __scs_base() when accessing the base pointer
> and to reset it in idle tasks before they're reused, hence
> scs_task_reset().

Ok. That'd be worth a comment somewhere, since it adds a number of
things which would otherwise be unnecessary.

IIUC this assumes an adversary who knows the address of a task's
thread_info, and has an arbitrary-read (to extract the SCS base from
thead_info) and an arbitrary-write (to modify the SCS area).

Assuming that's the case, I don't think this buys much. If said
adversary controls two userspace threads A and B, they only need to wait
until A is context-switched out or in userspace, and read A's SCS base
using B.

Given that, I'd rather always store the SCS base in the thread_info, and
simplify the rest of the code manipulating it.

Thanks,
Mark.

2019-10-28 22:08:34

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v2 05/17] add support for Clang's Shadow Call Stack (SCS)

On Mon, Oct 28, 2019 at 04:35:33PM +0000, Mark Rutland wrote:
> On Fri, Oct 25, 2019 at 01:49:21PM -0700, Sami Tolvanen wrote:
> > To keep the address of the currently active shadow stack out of
> > memory, the arm64 implementation clears this field when it loads x18
> > and saves the current value before a context switch. The generic code
> > doesn't expect the arch code to necessarily do so, but does allow it.
> > This requires us to use __scs_base() when accessing the base pointer
> > and to reset it in idle tasks before they're reused, hence
> > scs_task_reset().
>
> Ok. That'd be worth a comment somewhere, since it adds a number of
> things which would otherwise be unnecessary.
>
> IIUC this assumes an adversary who knows the address of a task's
> thread_info, and has an arbitrary-read (to extract the SCS base from
> thead_info) and an arbitrary-write (to modify the SCS area).
>
> Assuming that's the case, I don't think this buys much. If said
> adversary controls two userspace threads A and B, they only need to wait
> until A is context-switched out or in userspace, and read A's SCS base
> using B.
>
> Given that, I'd rather always store the SCS base in the thread_info, and
> simplify the rest of the code manipulating it.

I'd like to keep this as-is since it provides a temporal protection.
Having arbitrary kernel read and write at arbitrary time is a very
powerful attack primitive, and is, IMO, not very common. Many attacks
tend to be chains of bugs that give attackers narrow visibility in to the
kernel at specific moments. I would say this design is more about stopping
"current" from dumping thread_info (as there are many more opportunities
for current to see its own thread_info compared to arbitrary addresses
or another task's thread_info). As such, I think it's a reasonable
precaution to take.

--
Kees Cook

2019-10-29 19:53:23

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v2 09/17] arm64: disable function graph tracing with SCS

On Fri, Oct 25, 2019 at 4:03 AM Mark Rutland <[email protected]> wrote:
> I'm guessing it's difficult to always figure out the SCS slot for an
> instrumented callsite unless we pass this explicitly from the ftrace
> entry code, so we'd probably have to change some common infrastructure
> for that.
>
> We have a similar issue with pointer authentication, and we're solving
> that with -fpatchable-function-entry, which allows us to hook the
> callsite before it does anything with the return address. IIUC we could
> use the same mechanism here (and avoid introducing a third).
>
> Are there plans to implement -fpatchable-function-entry on the clang
> side?

I'm not sure if there are plans at the moment, but if this feature is
needed for PAC, adding it to clang shouldn't be a problem. Nick, did
you have any thoughts on this?

Sami

2019-10-29 19:59:36

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v2 05/17] add support for Clang's Shadow Call Stack (SCS)

On Mon, Oct 28, 2019 at 12:57 PM Kees Cook <[email protected]> wrote:
> On Mon, Oct 28, 2019 at 04:35:33PM +0000, Mark Rutland wrote:
> > On Fri, Oct 25, 2019 at 01:49:21PM -0700, Sami Tolvanen wrote:
> > > To keep the address of the currently active shadow stack out of
> > > memory, the arm64 implementation clears this field when it loads x18
> > > and saves the current value before a context switch. The generic code
> > > doesn't expect the arch code to necessarily do so, but does allow it.
> > > This requires us to use __scs_base() when accessing the base pointer
> > > and to reset it in idle tasks before they're reused, hence
> > > scs_task_reset().
> >
> > Ok. That'd be worth a comment somewhere, since it adds a number of
> > things which would otherwise be unnecessary.
> >
> > IIUC this assumes an adversary who knows the address of a task's
> > thread_info, and has an arbitrary-read (to extract the SCS base from
> > thead_info) and an arbitrary-write (to modify the SCS area).
> >
> > Assuming that's the case, I don't think this buys much. If said
> > adversary controls two userspace threads A and B, they only need to wait
> > until A is context-switched out or in userspace, and read A's SCS base
> > using B.
> >
> > Given that, I'd rather always store the SCS base in the thread_info, and
> > simplify the rest of the code manipulating it.
>
> I'd like to keep this as-is since it provides a temporal protection.
> Having arbitrary kernel read and write at arbitrary time is a very
> powerful attack primitive, and is, IMO, not very common. Many attacks
> tend to be chains of bugs that give attackers narrow visibility in to the
> kernel at specific moments. I would say this design is more about stopping
> "current" from dumping thread_info (as there are many more opportunities
> for current to see its own thread_info compared to arbitrary addresses
> or another task's thread_info). As such, I think it's a reasonable
> precaution to take.

I'm not sure if always storing the base address in thread_info would
simplify the code that much. We could remove __scs_base() and
scs_task_reset(), which are both trivial, and drop a few instructions
in the arch-specific code that clear the field. I do agree that a
comment or two would help understand what's going on here though.

Sami

2019-10-29 20:37:54

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH v2 09/17] arm64: disable function graph tracing with SCS

On Tue, Oct 29, 2019 at 10:45 AM Sami Tolvanen <[email protected]> wrote:
>
> On Fri, Oct 25, 2019 at 4:03 AM Mark Rutland <[email protected]> wrote:
> > We have a similar issue with pointer authentication, and we're solving
> > that with -fpatchable-function-entry, which allows us to hook the
> > callsite before it does anything with the return address. IIUC we could
> > use the same mechanism here (and avoid introducing a third).
> >
> > Are there plans to implement -fpatchable-function-entry on the clang
> > side?
>
> I'm not sure if there are plans at the moment, but if this feature is
> needed for PAC, adding it to clang shouldn't be a problem. Nick, did
> you have any thoughts on this?

I didn't see anything explicitly in LLVM's issue tracker. I also
didn't see -fpatchable-function-entry currently in -next other than
under arch/parisc. Are there patches I can look at?

Has ARM's kernel team expressed the need to ARM's LLVM team?
--
Thanks,
~Nick Desaulniers

2019-10-31 16:50:03

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v3 00/17] add support for Clang's Shadow Call Stack

This patch series adds support for Clang's Shadow Call Stack
(SCS) mitigation, which uses a separately allocated shadow stack
to protect against return address overwrites. More information
can be found here:

https://clang.llvm.org/docs/ShadowCallStack.html

SCS provides better protection against traditional buffer
overflows than CONFIG_STACKPROTECTOR_*, but it should be noted
that SCS security guarantees in the kernel differ from the ones
documented for user space. The kernel must store addresses of
shadow stacks used by other tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

SCS is currently supported only on arm64, where the compiler
requires the x18 register to be reserved for holding the current
task's shadow stack pointer. Because of this, the series includes
patches from Ard to remove x18 usage from assembly code.

With -fsanitize=shadow-call-stack, the compiler injects
instructions to all non-leaf C functions to store the return
address to the shadow stack, and unconditionally load it again
before returning. As a result, SCS is currently incompatible
with features that rely on modifying function return addresses
to alter control flow, such as function graph tracing and
kretprobes, although it may be possible to later change these
features to modify the shadow stack instead. A copy of the return
address is still kept in the kernel stack for compatibility with
stack unwinding, for example.

SCS has a minimal performance overhead, but allocating
shadow stacks increases kernel memory usage. The feature is
therefore mostly useful on hardware that lacks support for PAC
instructions.

Changes in v3:
- Switched to filter-out for removing SCS flags in Makefiles
- Changed the __noscs attribute to use __no_sanitize__("...")
instead of no_sanitize("...")
- Cleaned up inline function definitions and moved task_scs()
into a macro
- Cleaned up scs_free() and scs_magic()
- Moved SCS initialization into dup_task_struct() and removed
the now unused scs_task_init()
- Added comments to __scs_base() and scs_task_reset() to better
document design choices
- Changed copy_page to make the offset and bias explicit

Changes in v2:
- Changed Ard's KVM patch to use x29 instead of x18 for the
guest context, which makes restore_callee_saved_regs cleaner
- Updated help text (and commit messages) to point out
differences in security properties compared to user space SCS
- Cleaned up config options: removed the ROP protection choice,
replaced the CC_IS_CLANG dependency with an arch-specific
cc-option test, and moved disabling of incompatible config
options to an arch-specific Kconfig
- Added CC_FLAGS_SCS, which are filtered out where needed
instead of using DISABLE_SCS
- Added a __has_feature guard around __noscs for older clang
versions

Ard Biesheuvel (1):
arm64: kernel: avoid x18 __cpu_soft_restart

Sami Tolvanen (16):
arm64: mm: avoid x18 in idmap_kpti_install_ng_mappings
arm64/lib: copy_page: avoid x18 register in assembler code
arm64: kvm: stop treating register x18 as caller save
add support for Clang's Shadow Call Stack (SCS)
scs: add accounting
scs: add support for stack usage debugging
kprobes: fix compilation without CONFIG_KRETPROBES
arm64: kprobes: fix kprobes without CONFIG_KRETPROBES
arm64: disable kretprobes with SCS
arm64: disable function graph tracing with SCS
arm64: reserve x18 from general allocation with SCS
arm64: preserve x18 when CPU is suspended
arm64: efi: restore x18 if it was corrupted
arm64: vdso: disable Shadow Call Stack
arm64: disable SCS for hypervisor code
arm64: implement Shadow Call Stack

Makefile | 6 +
arch/Kconfig | 33 ++++
arch/arm64/Kconfig | 9 +-
arch/arm64/Makefile | 4 +
arch/arm64/include/asm/scs.h | 37 +++++
arch/arm64/include/asm/stacktrace.h | 4 +
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/include/asm/thread_info.h | 3 +
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +
arch/arm64/kernel/cpu-reset.S | 4 +-
arch/arm64/kernel/efi-rt-wrapper.S | 7 +-
arch/arm64/kernel/entry.S | 28 ++++
arch/arm64/kernel/head.S | 9 ++
arch/arm64/kernel/irq.c | 2 +
arch/arm64/kernel/probes/kprobes.c | 2 +
arch/arm64/kernel/process.c | 2 +
arch/arm64/kernel/scs.c | 39 +++++
arch/arm64/kernel/smp.c | 4 +
arch/arm64/kernel/vdso/Makefile | 2 +-
arch/arm64/kvm/hyp/Makefile | 3 +
arch/arm64/kvm/hyp/entry.S | 41 +++--
arch/arm64/lib/copy_page.S | 38 ++---
arch/arm64/mm/proc.S | 72 +++++----
drivers/base/node.c | 6 +
fs/proc/meminfo.c | 4 +
include/linux/compiler-clang.h | 6 +
include/linux/compiler_types.h | 4 +
include/linux/mmzone.h | 3 +
include/linux/scs.h | 54 +++++++
init/init_task.c | 8 +
kernel/Makefile | 1 +
kernel/fork.c | 9 ++
kernel/kprobes.c | 38 ++---
kernel/sched/core.c | 2 +
kernel/sched/sched.h | 1 +
kernel/scs.c | 227 +++++++++++++++++++++++++++
mm/page_alloc.c | 6 +
mm/vmstat.c | 3 +
39 files changed, 630 insertions(+), 97 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c

--
2.24.0.rc0.303.g954a862665-goog

2019-10-31 16:50:31

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v3 05/17] add support for Clang's Shadow Call Stack (SCS)

This change adds generic support for Clang's Shadow Call Stack,
which uses a shadow stack to protect return addresses from being
overwritten by an attacker. Details are available here:

https://clang.llvm.org/docs/ShadowCallStack.html

Note that security guarantees in the kernel differ from the
ones documented for user space. The kernel must store addresses
of shadow stacks used by other tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

Signed-off-by: Sami Tolvanen <[email protected]>
---
Makefile | 6 ++
arch/Kconfig | 33 +++++++
include/linux/compiler-clang.h | 6 ++
include/linux/compiler_types.h | 4 +
include/linux/scs.h | 54 +++++++++++
init/init_task.c | 8 ++
kernel/Makefile | 1 +
kernel/fork.c | 9 ++
kernel/sched/core.c | 2 +
kernel/sched/sched.h | 1 +
kernel/scs.c | 169 +++++++++++++++++++++++++++++++++
11 files changed, 293 insertions(+)
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c

diff --git a/Makefile b/Makefile
index 79be70bf2899..e6337314f8fb 100644
--- a/Makefile
+++ b/Makefile
@@ -846,6 +846,12 @@ ifdef CONFIG_LIVEPATCH
KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
endif

+ifdef CONFIG_SHADOW_CALL_STACK
+CC_FLAGS_SCS := -fsanitize=shadow-call-stack
+KBUILD_CFLAGS += $(CC_FLAGS_SCS)
+export CC_FLAGS_SCS
+endif
+
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)

diff --git a/arch/Kconfig b/arch/Kconfig
index 5f8a5d84dbbe..5e34cbcd8d6a 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -521,6 +521,39 @@ config STACKPROTECTOR_STRONG
about 20% of all kernel functions, which increases the kernel code
size by about 2%.

+config ARCH_SUPPORTS_SHADOW_CALL_STACK
+ bool
+ help
+ An architecture should select this if it supports Clang's Shadow
+ Call Stack, has asm/scs.h, and implements runtime support for shadow
+ stack switching.
+
+config SHADOW_CALL_STACK_VMAP
+ bool
+ depends on SHADOW_CALL_STACK
+ help
+ Use virtually mapped shadow call stacks. Selecting this option
+ provides better stack exhaustion protection, but increases per-thread
+ memory consumption as a full page is allocated for each shadow stack.
+
+config SHADOW_CALL_STACK
+ bool "Clang Shadow Call Stack"
+ depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
+ help
+ This option enables Clang's Shadow Call Stack, which uses a
+ shadow stack to protect function return addresses from being
+ overwritten by an attacker. More information can be found from
+ Clang's documentation:
+
+ https://clang.llvm.org/docs/ShadowCallStack.html
+
+ Note that security guarantees in the kernel differ from the ones
+ documented for user space. The kernel must store addresses of shadow
+ stacks used by other tasks and interrupt handlers in memory, which
+ means an attacker capable reading and writing arbitrary memory may
+ be able to locate them and hijack control flow by modifying shadow
+ stacks that are not currently in use.
+
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 333a6695a918..18fc4d29ef27 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -42,3 +42,9 @@
* compilers, like ICC.
*/
#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#if __has_feature(shadow_call_stack)
+# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
+#else
+# define __noscs
+#endif
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 72393a8c1a6c..be5d5be4b1ae 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -202,6 +202,10 @@ struct ftrace_likely_data {
# define randomized_struct_fields_end
#endif

+#ifndef __noscs
+# define __noscs
+#endif
+
#ifndef asm_volatile_goto
#define asm_volatile_goto(x...) asm goto(x)
#endif
diff --git a/include/linux/scs.h b/include/linux/scs.h
new file mode 100644
index 000000000000..0b70aff3846a
--- /dev/null
+++ b/include/linux/scs.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#ifndef _LINUX_SCS_H
+#define _LINUX_SCS_H
+
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+/*
+ * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
+ * architecture) provided ~40% safety margin on stack usage while keeping
+ * memory allocation overhead reasonable.
+ */
+#define SCS_SIZE 1024
+#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
+
+/* A random number to mark the end of the shadow stack. */
+#define SCS_END_MAGIC 0xaf0194819b1635f6UL
+
+#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack)
+
+static inline void task_set_scs(struct task_struct *tsk, void *s)
+{
+ task_scs(tsk) = s;
+}
+
+extern void scs_init(void);
+extern void scs_task_reset(struct task_struct *tsk);
+extern int scs_prepare(struct task_struct *tsk, int node);
+extern bool scs_corrupted(struct task_struct *tsk);
+extern void scs_release(struct task_struct *tsk);
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+#define task_scs(tsk) NULL
+
+static inline void task_set_scs(struct task_struct *tsk, void *s) {}
+static inline void scs_init(void) {}
+static inline void scs_task_reset(struct task_struct *tsk) {}
+static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
+static inline bool scs_corrupted(struct task_struct *tsk) { return false; }
+static inline void scs_release(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* _LINUX_SCS_H */
diff --git a/init/init_task.c b/init/init_task.c
index 9e5cbe5eab7b..cbd40460e903 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/audit.h>
#include <linux/numa.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <linux/uaccess.h>
@@ -184,6 +185,13 @@ struct task_struct init_task
};
EXPORT_SYMBOL(init_task);

+#ifdef CONFIG_SHADOW_CALL_STACK
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
+ __aligned(SCS_SIZE) = {
+ [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
+};
+#endif
+
/*
* Initial thread structure. Alignment of this is handled by a special
* linker map entry.
diff --git a/kernel/Makefile b/kernel/Makefile
index daad787fb795..313dbd44d576 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-$(CONFIG_PERF_EVENTS) += events/

diff --git a/kernel/fork.c b/kernel/fork.c
index bcdf53125210..3fa7ba64c62d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,6 +94,7 @@
#include <linux/livepatch.h>
#include <linux/thread_info.h>
#include <linux/stackleak.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -451,6 +452,8 @@ void put_task_stack(struct task_struct *tsk)

void free_task(struct task_struct *tsk)
{
+ scs_release(tsk);
+
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
@@ -834,6 +837,8 @@ void __init fork_init(void)
NULL, free_vm_stack_cache);
#endif

+ scs_init();
+
lockdep_init_task(&init_task);
uprobes_init();
}
@@ -893,6 +898,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_stack;

+ err = scs_prepare(tsk, node);
+ if (err)
+ goto free_stack;
+
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index dd05a378631a..e7faeb383008 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6013,6 +6013,8 @@ void init_idle(struct task_struct *idle, int cpu)
raw_spin_lock_irqsave(&idle->pi_lock, flags);
raw_spin_lock(&rq->lock);

+ scs_task_reset(idle);
+
__sched_fork(0, idle);
idle->state = TASK_RUNNING;
idle->se.exec_start = sched_clock();
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0db2c1b3361e..c153003a011c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -58,6 +58,7 @@
#include <linux/profile.h>
#include <linux/psi.h>
#include <linux/rcupdate_wait.h>
+#include <linux/scs.h>
#include <linux/security.h>
#include <linux/stop_machine.h>
#include <linux/suspend.h>
diff --git a/kernel/scs.c b/kernel/scs.c
new file mode 100644
index 000000000000..7c1a40020754
--- /dev/null
+++ b/kernel/scs.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/scs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/scs.h>
+
+static inline void *__scs_base(struct task_struct *tsk)
+{
+ /*
+ * We allow architectures to use the shadow_call_stack field in
+ * struct thread_info to store the current shadow stack pointer
+ * during context switches.
+ *
+ * This allows the implementation to also clear the field when
+ * the task is active to avoid keeping pointers to the current
+ * task's shadow stack in memory. This can make it harder for an
+ * attacker to locate the shadow stack, but also requires us to
+ * compute the base address when needed.
+ *
+ * We assume the stack is aligned to SCS_SIZE.
+ */
+ return (void *)((uintptr_t)task_scs(tsk) & ~(SCS_SIZE - 1));
+}
+
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+
+/* Keep a cache of shadow stacks */
+#define SCS_CACHE_SIZE 2
+static DEFINE_PER_CPU(void *, scs_cache[SCS_CACHE_SIZE]);
+
+static void *scs_alloc(int node)
+{
+ int i;
+
+ for (i = 0; i < SCS_CACHE_SIZE; i++) {
+ void *s;
+
+ s = this_cpu_xchg(scs_cache[i], NULL);
+ if (s) {
+ memset(s, 0, SCS_SIZE);
+ return s;
+ }
+ }
+
+ /*
+ * We allocate a full page for the shadow stack, which should be
+ * more than we need. Check the assumption nevertheless.
+ */
+ BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);
+
+ return __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL, 0,
+ node, __builtin_return_address(0));
+}
+
+static void scs_free(void *s)
+{
+ int i;
+
+ for (i = 0; i < SCS_CACHE_SIZE; i++)
+ if (this_cpu_cmpxchg(scs_cache[i], 0, s) == 0)
+ return;
+
+ vfree_atomic(s);
+}
+
+static int scs_cleanup(unsigned int cpu)
+{
+ int i;
+ void **cache = per_cpu_ptr(scs_cache, cpu);
+
+ for (i = 0; i < SCS_CACHE_SIZE; i++) {
+ vfree(cache[i]);
+ cache[i] = NULL;
+ }
+
+ return 0;
+}
+
+void __init scs_init(void)
+{
+ cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
+ scs_cleanup);
+}
+
+#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */
+
+static struct kmem_cache *scs_cache;
+
+static inline void *scs_alloc(int node)
+{
+ return kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
+}
+
+static inline void scs_free(void *s)
+{
+ kmem_cache_free(scs_cache, s);
+}
+
+void __init scs_init(void)
+{
+ scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
+ 0, NULL);
+ WARN_ON(!scs_cache);
+}
+
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+static inline unsigned long *scs_magic(struct task_struct *tsk)
+{
+ return (unsigned long *)(__scs_base(tsk) + SCS_SIZE) - 1;
+}
+
+static inline void scs_set_magic(struct task_struct *tsk)
+{
+ *scs_magic(tsk) = SCS_END_MAGIC;
+}
+
+void scs_task_reset(struct task_struct *tsk)
+{
+ /*
+ * Reset the shadow stack to the base address in case the task
+ * is reused.
+ */
+ task_set_scs(tsk, __scs_base(tsk));
+}
+
+int scs_prepare(struct task_struct *tsk, int node)
+{
+ void *s;
+
+ s = scs_alloc(node);
+ if (!s)
+ return -ENOMEM;
+
+ task_set_scs(tsk, s);
+ scs_set_magic(tsk);
+
+ return 0;
+}
+
+bool scs_corrupted(struct task_struct *tsk)
+{
+ return *scs_magic(tsk) != SCS_END_MAGIC;
+}
+
+void scs_release(struct task_struct *tsk)
+{
+ void *s;
+
+ s = __scs_base(tsk);
+ if (!s)
+ return;
+
+ WARN_ON(scs_corrupted(tsk));
+
+ task_set_scs(tsk, NULL);
+ scs_free(s);
+}
--
2.24.0.rc0.303.g954a862665-goog

2019-10-31 16:50:45

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v3 10/17] arm64: disable kretprobes with SCS

With CONFIG_KRETPROBES, function return addresses are modified to
redirect control flow to kretprobe_trampoline. This is incompatible
with SCS.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3f047afb982c..e7b57a8a5531 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -165,7 +165,7 @@ config ARM64
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KPROBES
- select HAVE_KRETPROBES
+ select HAVE_KRETPROBES if !SHADOW_CALL_STACK
select HAVE_GENERIC_VDSO
select IOMMU_DMA if IOMMU_SUPPORT
select IRQ_DOMAIN
--
2.24.0.rc0.303.g954a862665-goog

2019-10-31 16:50:52

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v3 11/17] arm64: disable function graph tracing with SCS

With CONFIG_FUNCTION_GRAPH_TRACER, function return addresses are
modified in ftrace_graph_caller and prepare_ftrace_return to redirect
control flow to ftrace_return_to_handler. This is incompatible with
SCS.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e7b57a8a5531..42867174920f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -148,7 +148,7 @@ config ARM64
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
- select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FUNCTION_GRAPH_TRACER if !SHADOW_CALL_STACK
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IRQ_TIME_ACCOUNTING
--
2.24.0.rc0.303.g954a862665-goog

2019-10-31 16:51:33

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v3 08/17] kprobes: fix compilation without CONFIG_KRETPROBES

kprobe_on_func_entry and arch_kprobe_on_func_entry need to be available
even if CONFIG_KRETPROBES is not selected.

Signed-off-by: Sami Tolvanen <[email protected]>
Acked-by: Masami Hiramatsu <[email protected]>
---
kernel/kprobes.c | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 53534aa258a6..b5e20a4669b8 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1829,6 +1829,25 @@ unsigned long __weak arch_deref_entry_point(void *entry)
return (unsigned long)entry;
}

+bool __weak arch_kprobe_on_func_entry(unsigned long offset)
+{
+ return !offset;
+}
+
+bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
+{
+ kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
+
+ if (IS_ERR(kp_addr))
+ return false;
+
+ if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
+ !arch_kprobe_on_func_entry(offset))
+ return false;
+
+ return true;
+}
+
#ifdef CONFIG_KRETPROBES
/*
* This kprobe pre_handler is registered with every kretprobe. When probe
@@ -1885,25 +1904,6 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(pre_handler_kretprobe);

-bool __weak arch_kprobe_on_func_entry(unsigned long offset)
-{
- return !offset;
-}
-
-bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
-{
- kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
-
- if (IS_ERR(kp_addr))
- return false;
-
- if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
- !arch_kprobe_on_func_entry(offset))
- return false;
-
- return true;
-}
-
int register_kretprobe(struct kretprobe *rp)
{
int ret = 0;
--
2.24.0.rc0.303.g954a862665-goog

2019-10-31 16:51:34

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v3 07/17] scs: add support for stack usage debugging

Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks.

Signed-off-by: Sami Tolvanen <[email protected]>
---
kernel/scs.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)

diff --git a/kernel/scs.c b/kernel/scs.c
index 7780fc4e29ac..67c43af627d1 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -167,6 +167,44 @@ int scs_prepare(struct task_struct *tsk, int node)
return 0;
}

+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long scs_used(struct task_struct *tsk)
+{
+ unsigned long *p = __scs_base(tsk);
+ unsigned long *end = scs_magic(tsk);
+ uintptr_t s = (uintptr_t)p;
+
+ while (p < end && *p)
+ p++;
+
+ return (uintptr_t)p - s;
+}
+
+static void scs_check_usage(struct task_struct *tsk)
+{
+ static DEFINE_SPINLOCK(lock);
+ static unsigned long highest;
+ unsigned long used = scs_used(tsk);
+
+ if (used <= highest)
+ return;
+
+ spin_lock(&lock);
+
+ if (used > highest) {
+ pr_info("%s: highest shadow stack usage %lu bytes\n",
+ __func__, used);
+ highest = used;
+ }
+
+ spin_unlock(&lock);
+}
+#else
+static inline void scs_check_usage(struct task_struct *tsk)
+{
+}
+#endif
+
bool scs_corrupted(struct task_struct *tsk)
{
return *scs_magic(tsk) != SCS_END_MAGIC;
@@ -181,6 +219,7 @@ void scs_release(struct task_struct *tsk)
return;

WARN_ON(scs_corrupted(tsk));
+ scs_check_usage(tsk);

scs_account(tsk, -1);
task_set_scs(tsk, NULL);
--
2.24.0.rc0.303.g954a862665-goog

2019-10-31 21:21:35

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v3 01/17] arm64: mm: avoid x18 in idmap_kpti_install_ng_mappings

idmap_kpti_install_ng_mappings uses x18 as a temporary register, which
will result in a conflict when x18 is reserved. Use x16 and x17 instead
where needed.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/arm64/mm/proc.S | 63 ++++++++++++++++++++++----------------------
1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index a1e0592d1fbc..fdabf40a83c8 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -250,15 +250,15 @@ ENTRY(idmap_kpti_install_ng_mappings)
/* We're the boot CPU. Wait for the others to catch up */
sevl
1: wfe
- ldaxr w18, [flag_ptr]
- eor w18, w18, num_cpus
- cbnz w18, 1b
+ ldaxr w17, [flag_ptr]
+ eor w17, w17, num_cpus
+ cbnz w17, 1b

/* We need to walk swapper, so turn off the MMU. */
pre_disable_mmu_workaround
- mrs x18, sctlr_el1
- bic x18, x18, #SCTLR_ELx_M
- msr sctlr_el1, x18
+ mrs x17, sctlr_el1
+ bic x17, x17, #SCTLR_ELx_M
+ msr sctlr_el1, x17
isb

/* Everybody is enjoying the idmap, so we can rewrite swapper. */
@@ -281,9 +281,9 @@ skip_pgd:
isb

/* We're done: fire up the MMU again */
- mrs x18, sctlr_el1
- orr x18, x18, #SCTLR_ELx_M
- msr sctlr_el1, x18
+ mrs x17, sctlr_el1
+ orr x17, x17, #SCTLR_ELx_M
+ msr sctlr_el1, x17
isb

/*
@@ -353,46 +353,47 @@ skip_pte:
b.ne do_pte
b next_pmd

+ .unreq cpu
+ .unreq num_cpus
+ .unreq swapper_pa
+ .unreq cur_pgdp
+ .unreq end_pgdp
+ .unreq pgd
+ .unreq cur_pudp
+ .unreq end_pudp
+ .unreq pud
+ .unreq cur_pmdp
+ .unreq end_pmdp
+ .unreq pmd
+ .unreq cur_ptep
+ .unreq end_ptep
+ .unreq pte
+
/* Secondary CPUs end up here */
__idmap_kpti_secondary:
/* Uninstall swapper before surgery begins */
- __idmap_cpu_set_reserved_ttbr1 x18, x17
+ __idmap_cpu_set_reserved_ttbr1 x16, x17

/* Increment the flag to let the boot CPU we're ready */
-1: ldxr w18, [flag_ptr]
- add w18, w18, #1
- stxr w17, w18, [flag_ptr]
+1: ldxr w16, [flag_ptr]
+ add w16, w16, #1
+ stxr w17, w16, [flag_ptr]
cbnz w17, 1b

/* Wait for the boot CPU to finish messing around with swapper */
sevl
1: wfe
- ldxr w18, [flag_ptr]
- cbnz w18, 1b
+ ldxr w16, [flag_ptr]
+ cbnz w16, 1b

/* All done, act like nothing happened */
- offset_ttbr1 swapper_ttb, x18
+ offset_ttbr1 swapper_ttb, x16
msr ttbr1_el1, swapper_ttb
isb
ret

- .unreq cpu
- .unreq num_cpus
- .unreq swapper_pa
.unreq swapper_ttb
.unreq flag_ptr
- .unreq cur_pgdp
- .unreq end_pgdp
- .unreq pgd
- .unreq cur_pudp
- .unreq end_pudp
- .unreq pud
- .unreq cur_pmdp
- .unreq end_pmdp
- .unreq pmd
- .unreq cur_ptep
- .unreq end_ptep
- .unreq pte
ENDPROC(idmap_kpti_install_ng_mappings)
.popsection
#endif
--
2.24.0.rc0.303.g954a862665-goog

2019-11-01 03:59:34

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v3 08/17] kprobes: fix compilation without CONFIG_KRETPROBES

On Thu, Oct 31, 2019 at 09:46:28AM -0700, [email protected] wrote:
> kprobe_on_func_entry and arch_kprobe_on_func_entry need to be available
> even if CONFIG_KRETPROBES is not selected.
>
> Signed-off-by: Sami Tolvanen <[email protected]>

FWIW:

Reviewed-by: Kees Cook <[email protected]>

-Kees

> Acked-by: Masami Hiramatsu <[email protected]>
> ---
> kernel/kprobes.c | 38 +++++++++++++++++++-------------------
> 1 file changed, 19 insertions(+), 19 deletions(-)
>
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 53534aa258a6..b5e20a4669b8 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -1829,6 +1829,25 @@ unsigned long __weak arch_deref_entry_point(void *entry)
> return (unsigned long)entry;
> }
>
> +bool __weak arch_kprobe_on_func_entry(unsigned long offset)
> +{
> + return !offset;
> +}
> +
> +bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
> +{
> + kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
> +
> + if (IS_ERR(kp_addr))
> + return false;
> +
> + if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
> + !arch_kprobe_on_func_entry(offset))
> + return false;
> +
> + return true;
> +}
> +
> #ifdef CONFIG_KRETPROBES
> /*
> * This kprobe pre_handler is registered with every kretprobe. When probe
> @@ -1885,25 +1904,6 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
> }
> NOKPROBE_SYMBOL(pre_handler_kretprobe);
>
> -bool __weak arch_kprobe_on_func_entry(unsigned long offset)
> -{
> - return !offset;
> -}
> -
> -bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
> -{
> - kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
> -
> - if (IS_ERR(kp_addr))
> - return false;
> -
> - if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
> - !arch_kprobe_on_func_entry(offset))
> - return false;
> -
> - return true;
> -}
> -
> int register_kretprobe(struct kretprobe *rp)
> {
> int ret = 0;
> --
> 2.24.0.rc0.303.g954a862665-goog
>

--
Kees Cook

2019-11-01 04:00:26

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v3 05/17] add support for Clang's Shadow Call Stack (SCS)

On Thu, Oct 31, 2019 at 09:46:25AM -0700, [email protected] wrote:
> This change adds generic support for Clang's Shadow Call Stack,
> which uses a shadow stack to protect return addresses from being
> overwritten by an attacker. Details are available here:
>
> https://clang.llvm.org/docs/ShadowCallStack.html
>
> Note that security guarantees in the kernel differ from the
> ones documented for user space. The kernel must store addresses
> of shadow stacks used by other tasks and interrupt handlers in
> memory, which means an attacker capable reading and writing
> arbitrary memory may be able to locate them and hijack control
> flow by modifying shadow stacks that are not currently in use.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> Makefile | 6 ++
> arch/Kconfig | 33 +++++++
> include/linux/compiler-clang.h | 6 ++
> include/linux/compiler_types.h | 4 +
> include/linux/scs.h | 54 +++++++++++
> init/init_task.c | 8 ++
> kernel/Makefile | 1 +
> kernel/fork.c | 9 ++
> kernel/sched/core.c | 2 +
> kernel/sched/sched.h | 1 +
> kernel/scs.c | 169 +++++++++++++++++++++++++++++++++
> 11 files changed, 293 insertions(+)
> create mode 100644 include/linux/scs.h
> create mode 100644 kernel/scs.c
>
> diff --git a/Makefile b/Makefile
> index 79be70bf2899..e6337314f8fb 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -846,6 +846,12 @@ ifdef CONFIG_LIVEPATCH
> KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
> endif
>
> +ifdef CONFIG_SHADOW_CALL_STACK
> +CC_FLAGS_SCS := -fsanitize=shadow-call-stack
> +KBUILD_CFLAGS += $(CC_FLAGS_SCS)
> +export CC_FLAGS_SCS
> +endif
> +
> # arch Makefile may override CC so keep this after arch Makefile is included
> NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 5f8a5d84dbbe..5e34cbcd8d6a 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -521,6 +521,39 @@ config STACKPROTECTOR_STRONG
> about 20% of all kernel functions, which increases the kernel code
> size by about 2%.
>
> +config ARCH_SUPPORTS_SHADOW_CALL_STACK
> + bool
> + help
> + An architecture should select this if it supports Clang's Shadow
> + Call Stack, has asm/scs.h, and implements runtime support for shadow
> + stack switching.
> +
> +config SHADOW_CALL_STACK_VMAP
> + bool
> + depends on SHADOW_CALL_STACK
> + help
> + Use virtually mapped shadow call stacks. Selecting this option
> + provides better stack exhaustion protection, but increases per-thread
> + memory consumption as a full page is allocated for each shadow stack.
> +
> +config SHADOW_CALL_STACK
> + bool "Clang Shadow Call Stack"
> + depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
> + help
> + This option enables Clang's Shadow Call Stack, which uses a
> + shadow stack to protect function return addresses from being
> + overwritten by an attacker. More information can be found from
> + Clang's documentation:
> +
> + https://clang.llvm.org/docs/ShadowCallStack.html
> +
> + Note that security guarantees in the kernel differ from the ones
> + documented for user space. The kernel must store addresses of shadow
> + stacks used by other tasks and interrupt handlers in memory, which
> + means an attacker capable reading and writing arbitrary memory may
> + be able to locate them and hijack control flow by modifying shadow
> + stacks that are not currently in use.
> +
> config HAVE_ARCH_WITHIN_STACK_FRAMES
> bool
> help
> diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
> index 333a6695a918..18fc4d29ef27 100644
> --- a/include/linux/compiler-clang.h
> +++ b/include/linux/compiler-clang.h
> @@ -42,3 +42,9 @@
> * compilers, like ICC.
> */
> #define barrier() __asm__ __volatile__("" : : : "memory")
> +
> +#if __has_feature(shadow_call_stack)
> +# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
> +#else
> +# define __noscs
> +#endif
> diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
> index 72393a8c1a6c..be5d5be4b1ae 100644
> --- a/include/linux/compiler_types.h
> +++ b/include/linux/compiler_types.h
> @@ -202,6 +202,10 @@ struct ftrace_likely_data {
> # define randomized_struct_fields_end
> #endif
>
> +#ifndef __noscs
> +# define __noscs
> +#endif
> +
> #ifndef asm_volatile_goto
> #define asm_volatile_goto(x...) asm goto(x)
> #endif
> diff --git a/include/linux/scs.h b/include/linux/scs.h
> new file mode 100644
> index 000000000000..0b70aff3846a
> --- /dev/null
> +++ b/include/linux/scs.h
> @@ -0,0 +1,54 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Shadow Call Stack support.
> + *
> + * Copyright (C) 2019 Google LLC
> + */
> +
> +#ifndef _LINUX_SCS_H
> +#define _LINUX_SCS_H
> +
> +#include <linux/gfp.h>
> +#include <linux/sched.h>
> +#include <asm/page.h>
> +
> +#ifdef CONFIG_SHADOW_CALL_STACK
> +
> +/*
> + * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
> + * architecture) provided ~40% safety margin on stack usage while keeping
> + * memory allocation overhead reasonable.
> + */
> +#define SCS_SIZE 1024
> +#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
> +
> +/* A random number to mark the end of the shadow stack. */
> +#define SCS_END_MAGIC 0xaf0194819b1635f6UL

Is 0xaf.... non-canonical for arm64? While "random", it should also
likely be an "impossible" value to find on the call stack.

Otherwise, all looks great. :)

Reviewed-by: Kees Cook <[email protected]>

-Kees

> +
> +#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack)
> +
> +static inline void task_set_scs(struct task_struct *tsk, void *s)
> +{
> + task_scs(tsk) = s;
> +}
> +
> +extern void scs_init(void);
> +extern void scs_task_reset(struct task_struct *tsk);
> +extern int scs_prepare(struct task_struct *tsk, int node);
> +extern bool scs_corrupted(struct task_struct *tsk);
> +extern void scs_release(struct task_struct *tsk);
> +
> +#else /* CONFIG_SHADOW_CALL_STACK */
> +
> +#define task_scs(tsk) NULL
> +
> +static inline void task_set_scs(struct task_struct *tsk, void *s) {}
> +static inline void scs_init(void) {}
> +static inline void scs_task_reset(struct task_struct *tsk) {}
> +static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
> +static inline bool scs_corrupted(struct task_struct *tsk) { return false; }
> +static inline void scs_release(struct task_struct *tsk) {}
> +
> +#endif /* CONFIG_SHADOW_CALL_STACK */
> +
> +#endif /* _LINUX_SCS_H */
> diff --git a/init/init_task.c b/init/init_task.c
> index 9e5cbe5eab7b..cbd40460e903 100644
> --- a/init/init_task.c
> +++ b/init/init_task.c
> @@ -11,6 +11,7 @@
> #include <linux/mm.h>
> #include <linux/audit.h>
> #include <linux/numa.h>
> +#include <linux/scs.h>
>
> #include <asm/pgtable.h>
> #include <linux/uaccess.h>
> @@ -184,6 +185,13 @@ struct task_struct init_task
> };
> EXPORT_SYMBOL(init_task);
>
> +#ifdef CONFIG_SHADOW_CALL_STACK
> +unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
> + __aligned(SCS_SIZE) = {
> + [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
> +};
> +#endif
> +
> /*
> * Initial thread structure. Alignment of this is handled by a special
> * linker map entry.
> diff --git a/kernel/Makefile b/kernel/Makefile
> index daad787fb795..313dbd44d576 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -102,6 +102,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
> obj-$(CONFIG_IRQ_WORK) += irq_work.o
> obj-$(CONFIG_CPU_PM) += cpu_pm.o
> obj-$(CONFIG_BPF) += bpf/
> +obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
>
> obj-$(CONFIG_PERF_EVENTS) += events/
>
> diff --git a/kernel/fork.c b/kernel/fork.c
> index bcdf53125210..3fa7ba64c62d 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -94,6 +94,7 @@
> #include <linux/livepatch.h>
> #include <linux/thread_info.h>
> #include <linux/stackleak.h>
> +#include <linux/scs.h>
>
> #include <asm/pgtable.h>
> #include <asm/pgalloc.h>
> @@ -451,6 +452,8 @@ void put_task_stack(struct task_struct *tsk)
>
> void free_task(struct task_struct *tsk)
> {
> + scs_release(tsk);
> +
> #ifndef CONFIG_THREAD_INFO_IN_TASK
> /*
> * The task is finally done with both the stack and thread_info,
> @@ -834,6 +837,8 @@ void __init fork_init(void)
> NULL, free_vm_stack_cache);
> #endif
>
> + scs_init();
> +
> lockdep_init_task(&init_task);
> uprobes_init();
> }
> @@ -893,6 +898,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
> if (err)
> goto free_stack;
>
> + err = scs_prepare(tsk, node);
> + if (err)
> + goto free_stack;
> +
> #ifdef CONFIG_SECCOMP
> /*
> * We must handle setting up seccomp filters once we're under
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index dd05a378631a..e7faeb383008 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -6013,6 +6013,8 @@ void init_idle(struct task_struct *idle, int cpu)
> raw_spin_lock_irqsave(&idle->pi_lock, flags);
> raw_spin_lock(&rq->lock);
>
> + scs_task_reset(idle);
> +
> __sched_fork(0, idle);
> idle->state = TASK_RUNNING;
> idle->se.exec_start = sched_clock();
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 0db2c1b3361e..c153003a011c 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -58,6 +58,7 @@
> #include <linux/profile.h>
> #include <linux/psi.h>
> #include <linux/rcupdate_wait.h>
> +#include <linux/scs.h>
> #include <linux/security.h>
> #include <linux/stop_machine.h>
> #include <linux/suspend.h>
> diff --git a/kernel/scs.c b/kernel/scs.c
> new file mode 100644
> index 000000000000..7c1a40020754
> --- /dev/null
> +++ b/kernel/scs.c
> @@ -0,0 +1,169 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Shadow Call Stack support.
> + *
> + * Copyright (C) 2019 Google LLC
> + */
> +
> +#include <linux/cpuhotplug.h>
> +#include <linux/mm.h>
> +#include <linux/mmzone.h>
> +#include <linux/scs.h>
> +#include <linux/slab.h>
> +#include <linux/vmalloc.h>
> +#include <asm/scs.h>
> +
> +static inline void *__scs_base(struct task_struct *tsk)
> +{
> + /*
> + * We allow architectures to use the shadow_call_stack field in
> + * struct thread_info to store the current shadow stack pointer
> + * during context switches.
> + *
> + * This allows the implementation to also clear the field when
> + * the task is active to avoid keeping pointers to the current
> + * task's shadow stack in memory. This can make it harder for an
> + * attacker to locate the shadow stack, but also requires us to
> + * compute the base address when needed.
> + *
> + * We assume the stack is aligned to SCS_SIZE.
> + */
> + return (void *)((uintptr_t)task_scs(tsk) & ~(SCS_SIZE - 1));
> +}
> +
> +#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
> +
> +/* Keep a cache of shadow stacks */
> +#define SCS_CACHE_SIZE 2
> +static DEFINE_PER_CPU(void *, scs_cache[SCS_CACHE_SIZE]);
> +
> +static void *scs_alloc(int node)
> +{
> + int i;
> +
> + for (i = 0; i < SCS_CACHE_SIZE; i++) {
> + void *s;
> +
> + s = this_cpu_xchg(scs_cache[i], NULL);
> + if (s) {
> + memset(s, 0, SCS_SIZE);
> + return s;
> + }
> + }
> +
> + /*
> + * We allocate a full page for the shadow stack, which should be
> + * more than we need. Check the assumption nevertheless.
> + */
> + BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);
> +
> + return __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
> + VMALLOC_START, VMALLOC_END,
> + GFP_SCS, PAGE_KERNEL, 0,
> + node, __builtin_return_address(0));
> +}
> +
> +static void scs_free(void *s)
> +{
> + int i;
> +
> + for (i = 0; i < SCS_CACHE_SIZE; i++)
> + if (this_cpu_cmpxchg(scs_cache[i], 0, s) == 0)
> + return;
> +
> + vfree_atomic(s);
> +}
> +
> +static int scs_cleanup(unsigned int cpu)
> +{
> + int i;
> + void **cache = per_cpu_ptr(scs_cache, cpu);
> +
> + for (i = 0; i < SCS_CACHE_SIZE; i++) {
> + vfree(cache[i]);
> + cache[i] = NULL;
> + }
> +
> + return 0;
> +}
> +
> +void __init scs_init(void)
> +{
> + cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
> + scs_cleanup);
> +}
> +
> +#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */
> +
> +static struct kmem_cache *scs_cache;
> +
> +static inline void *scs_alloc(int node)
> +{
> + return kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
> +}
> +
> +static inline void scs_free(void *s)
> +{
> + kmem_cache_free(scs_cache, s);
> +}
> +
> +void __init scs_init(void)
> +{
> + scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
> + 0, NULL);
> + WARN_ON(!scs_cache);
> +}
> +
> +#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
> +
> +static inline unsigned long *scs_magic(struct task_struct *tsk)
> +{
> + return (unsigned long *)(__scs_base(tsk) + SCS_SIZE) - 1;
> +}
> +
> +static inline void scs_set_magic(struct task_struct *tsk)
> +{
> + *scs_magic(tsk) = SCS_END_MAGIC;
> +}
> +
> +void scs_task_reset(struct task_struct *tsk)
> +{
> + /*
> + * Reset the shadow stack to the base address in case the task
> + * is reused.
> + */
> + task_set_scs(tsk, __scs_base(tsk));
> +}
> +
> +int scs_prepare(struct task_struct *tsk, int node)
> +{
> + void *s;
> +
> + s = scs_alloc(node);
> + if (!s)
> + return -ENOMEM;
> +
> + task_set_scs(tsk, s);
> + scs_set_magic(tsk);
> +
> + return 0;
> +}
> +
> +bool scs_corrupted(struct task_struct *tsk)
> +{
> + return *scs_magic(tsk) != SCS_END_MAGIC;
> +}
> +
> +void scs_release(struct task_struct *tsk)
> +{
> + void *s;
> +
> + s = __scs_base(tsk);
> + if (!s)
> + return;
> +
> + WARN_ON(scs_corrupted(tsk));
> +
> + task_set_scs(tsk, NULL);
> + scs_free(s);
> +}
> --
> 2.24.0.rc0.303.g954a862665-goog
>

--
Kees Cook

2019-11-01 05:13:58

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v3 07/17] scs: add support for stack usage debugging

On Thu, Oct 31, 2019 at 09:46:27AM -0700, [email protected] wrote:
> Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks.

Did I miss it, or is there no Kconfig section for this? I just realized
I can't find it. I was going to say "this commit log should explain
why/when this option is used", but then figured it might be explained in
the Kconfig ... but I couldn't find it. ;)

-Kees

>
> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> kernel/scs.c | 39 +++++++++++++++++++++++++++++++++++++++
> 1 file changed, 39 insertions(+)
>
> diff --git a/kernel/scs.c b/kernel/scs.c
> index 7780fc4e29ac..67c43af627d1 100644
> --- a/kernel/scs.c
> +++ b/kernel/scs.c
> @@ -167,6 +167,44 @@ int scs_prepare(struct task_struct *tsk, int node)
> return 0;
> }
>
> +#ifdef CONFIG_DEBUG_STACK_USAGE
> +static inline unsigned long scs_used(struct task_struct *tsk)
> +{
> + unsigned long *p = __scs_base(tsk);
> + unsigned long *end = scs_magic(tsk);
> + uintptr_t s = (uintptr_t)p;
> +
> + while (p < end && *p)
> + p++;
> +
> + return (uintptr_t)p - s;
> +}
> +
> +static void scs_check_usage(struct task_struct *tsk)
> +{
> + static DEFINE_SPINLOCK(lock);
> + static unsigned long highest;
> + unsigned long used = scs_used(tsk);
> +
> + if (used <= highest)
> + return;
> +
> + spin_lock(&lock);
> +
> + if (used > highest) {
> + pr_info("%s: highest shadow stack usage %lu bytes\n",
> + __func__, used);
> + highest = used;
> + }
> +
> + spin_unlock(&lock);
> +}
> +#else
> +static inline void scs_check_usage(struct task_struct *tsk)
> +{
> +}
> +#endif
> +
> bool scs_corrupted(struct task_struct *tsk)
> {
> return *scs_magic(tsk) != SCS_END_MAGIC;
> @@ -181,6 +219,7 @@ void scs_release(struct task_struct *tsk)
> return;
>
> WARN_ON(scs_corrupted(tsk));
> + scs_check_usage(tsk);
>
> scs_account(tsk, -1);
> task_set_scs(tsk, NULL);
> --
> 2.24.0.rc0.303.g954a862665-goog
>

--
Kees Cook

2019-11-01 05:23:02

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v3 10/17] arm64: disable kretprobes with SCS

On Thu, Oct 31, 2019 at 09:46:30AM -0700, [email protected] wrote:
> With CONFIG_KRETPROBES, function return addresses are modified to
> redirect control flow to kretprobe_trampoline. This is incompatible
> with SCS.
>
> Signed-off-by: Sami Tolvanen <[email protected]>

Reviewed-by: Kees Cook <[email protected]>

-Kees

> ---
> arch/arm64/Kconfig | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 3f047afb982c..e7b57a8a5531 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -165,7 +165,7 @@ config ARM64
> select HAVE_STACKPROTECTOR
> select HAVE_SYSCALL_TRACEPOINTS
> select HAVE_KPROBES
> - select HAVE_KRETPROBES
> + select HAVE_KRETPROBES if !SHADOW_CALL_STACK
> select HAVE_GENERIC_VDSO
> select IOMMU_DMA if IOMMU_SUPPORT
> select IRQ_DOMAIN
> --
> 2.24.0.rc0.303.g954a862665-goog
>

--
Kees Cook

2019-11-01 05:27:47

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v3 11/17] arm64: disable function graph tracing with SCS

On Thu, Oct 31, 2019 at 09:46:31AM -0700, [email protected] wrote:
> With CONFIG_FUNCTION_GRAPH_TRACER, function return addresses are
> modified in ftrace_graph_caller and prepare_ftrace_return to redirect
> control flow to ftrace_return_to_handler. This is incompatible with
> SCS.

IIRC, the argument was to disable these on a per-arch basis instead of
doing it as a "depends on !SHADOW_CALL_STACK" in the top-level function
graph tracer Kconfig? (I'm just thinking ahead to doing this again for
other architectures, though, I guess, there is much more work than just
that for, say, x86.)

Regardless:

Reviewed-by: Kees Cook <[email protected]>

-Kees


>
> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> arch/arm64/Kconfig | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index e7b57a8a5531..42867174920f 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -148,7 +148,7 @@ config ARM64
> select HAVE_FTRACE_MCOUNT_RECORD
> select HAVE_FUNCTION_TRACER
> select HAVE_FUNCTION_ERROR_INJECTION
> - select HAVE_FUNCTION_GRAPH_TRACER
> + select HAVE_FUNCTION_GRAPH_TRACER if !SHADOW_CALL_STACK
> select HAVE_GCC_PLUGINS
> select HAVE_HW_BREAKPOINT if PERF_EVENTS
> select HAVE_IRQ_TIME_ACCOUNTING
> --
> 2.24.0.rc0.303.g954a862665-goog
>

--
Kees Cook

2019-11-01 16:35:04

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v3 07/17] scs: add support for stack usage debugging

On Thu, Oct 31, 2019 at 8:55 PM Kees Cook <[email protected]> wrote:
>
> On Thu, Oct 31, 2019 at 09:46:27AM -0700, [email protected] wrote:
> > Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks.
>
> Did I miss it, or is there no Kconfig section for this? I just realized
> I can't find it. I was going to say "this commit log should explain
> why/when this option is used", but then figured it might be explained in
> the Kconfig ... but I couldn't find it. ;)

It's in lib/Kconfig.debug. But yes, I will add a commit message in v4.

Sami

2019-11-01 17:21:18

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v3 05/17] add support for Clang's Shadow Call Stack (SCS)

On Thu, Oct 31, 2019 at 8:51 PM Kees Cook <[email protected]> wrote:
> > +/* A random number to mark the end of the shadow stack. */
> > +#define SCS_END_MAGIC 0xaf0194819b1635f6UL
>
> Is 0xaf.... non-canonical for arm64? While "random", it should also
> likely be an "impossible" value to find on the call stack.

Agreed, and yes, this is non-canonical for arm64 and AFAIK all 64-bit
architectures the kernel supports. I'll add a note about it.

Sami

2019-11-01 19:05:48

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v3 07/17] scs: add support for stack usage debugging

On Fri, Nov 01, 2019 at 09:32:54AM -0700, Sami Tolvanen wrote:
> On Thu, Oct 31, 2019 at 8:55 PM Kees Cook <[email protected]> wrote:
> >
> > On Thu, Oct 31, 2019 at 09:46:27AM -0700, [email protected] wrote:
> > > Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks.
> >
> > Did I miss it, or is there no Kconfig section for this? I just realized
> > I can't find it. I was going to say "this commit log should explain
> > why/when this option is used", but then figured it might be explained in
> > the Kconfig ... but I couldn't find it. ;)
>
> It's in lib/Kconfig.debug. But yes, I will add a commit message in v4.

Oh duh -- it's an existing option. Cool; I'm all good. :)

--
Kees Cook

2019-11-01 20:35:06

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v3 11/17] arm64: disable function graph tracing with SCS

On Thu, Oct 31, 2019 at 8:58 PM Kees Cook <[email protected]> wrote:
> IIRC, the argument was to disable these on a per-arch basis instead of
> doing it as a "depends on !SHADOW_CALL_STACK" in the top-level function
> graph tracer Kconfig?

Yes, that's correct.

> (I'm just thinking ahead to doing this again for
> other architectures, though, I guess, there is much more work than just
> that for, say, x86.)

We can always change this later if needed, and possibly figure out how
to make function graph tracing and kretprobes work with SCS.

Sami

2019-11-01 22:13:03

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 00/17] add support for Clang's Shadow Call Stack

This patch series adds support for Clang's Shadow Call Stack
(SCS) mitigation, which uses a separately allocated shadow stack
to protect against return address overwrites. More information
can be found here:

https://clang.llvm.org/docs/ShadowCallStack.html

SCS provides better protection against traditional buffer
overflows than CONFIG_STACKPROTECTOR_*, but it should be noted
that SCS security guarantees in the kernel differ from the ones
documented for user space. The kernel must store addresses of
shadow stacks used by other tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

SCS is currently supported only on arm64, where the compiler
requires the x18 register to be reserved for holding the current
task's shadow stack pointer. Because of this, the series includes
patches from Ard to remove x18 usage from assembly code.

With -fsanitize=shadow-call-stack, the compiler injects
instructions to all non-leaf C functions to store the return
address to the shadow stack, and unconditionally load it again
before returning. As a result, SCS is currently incompatible
with features that rely on modifying function return addresses
to alter control flow, such as function graph tracing and
kretprobes, although it may be possible to later change these
features to modify the shadow stack instead. A copy of the return
address is still kept in the kernel stack for compatibility with
stack unwinding, for example.

SCS has a minimal performance overhead, but allocating
shadow stacks increases kernel memory usage. The feature is
therefore mostly useful on hardware that lacks support for PAC
instructions.

Changes in v4:
- Fixed authorship for Ard's patches
- Added missing commit messages
- Commented code that clears SCS from thread_info
- Added a comment about SCS_END_MAGIC being non-canonical

Changes in v3:
- Switched to filter-out for removing SCS flags in Makefiles
- Changed the __noscs attribute to use __no_sanitize__("...")
instead of no_sanitize("...")
- Cleaned up inline function definitions and moved task_scs()
into a macro
- Cleaned up scs_free() and scs_magic()
- Moved SCS initialization into dup_task_struct() and removed
the now unused scs_task_init()
- Added comments to __scs_base() and scs_task_reset() to better
document design choices
- Changed copy_page to make the offset and bias explicit

Changes in v2:
- Changed Ard's KVM patch to use x29 instead of x18 for the
guest context, which makes restore_callee_saved_regs cleaner
- Updated help text (and commit messages) to point out
differences in security properties compared to user space SCS
- Cleaned up config options: removed the ROP protection choice,
replaced the CC_IS_CLANG dependency with an arch-specific
cc-option test, and moved disabling of incompatible config
options to an arch-specific Kconfig
- Added CC_FLAGS_SCS, which are filtered out where needed
instead of using DISABLE_SCS
- Added a __has_feature guard around __noscs for older clang
versions

Ard Biesheuvel (3):
arm64/lib: copy_page: avoid x18 register in assembler code
arm64: kvm: stop treating register x18 as caller save
arm64: kernel: avoid x18 __cpu_soft_restart

Sami Tolvanen (14):
arm64: mm: avoid x18 in idmap_kpti_install_ng_mappings
add support for Clang's Shadow Call Stack (SCS)
scs: add accounting
scs: add support for stack usage debugging
kprobes: fix compilation without CONFIG_KRETPROBES
arm64: kprobes: fix kprobes without CONFIG_KRETPROBES
arm64: disable kretprobes with SCS
arm64: disable function graph tracing with SCS
arm64: reserve x18 from general allocation with SCS
arm64: preserve x18 when CPU is suspended
arm64: efi: restore x18 if it was corrupted
arm64: vdso: disable Shadow Call Stack
arm64: disable SCS for hypervisor code
arm64: implement Shadow Call Stack

Makefile | 6 +
arch/Kconfig | 33 ++++
arch/arm64/Kconfig | 9 +-
arch/arm64/Makefile | 4 +
arch/arm64/include/asm/scs.h | 37 +++++
arch/arm64/include/asm/stacktrace.h | 4 +
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/include/asm/thread_info.h | 3 +
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +
arch/arm64/kernel/cpu-reset.S | 4 +-
arch/arm64/kernel/efi-rt-wrapper.S | 7 +-
arch/arm64/kernel/entry.S | 28 ++++
arch/arm64/kernel/head.S | 9 ++
arch/arm64/kernel/irq.c | 2 +
arch/arm64/kernel/probes/kprobes.c | 2 +
arch/arm64/kernel/process.c | 2 +
arch/arm64/kernel/scs.c | 39 +++++
arch/arm64/kernel/smp.c | 4 +
arch/arm64/kernel/vdso/Makefile | 2 +-
arch/arm64/kvm/hyp/Makefile | 3 +
arch/arm64/kvm/hyp/entry.S | 41 +++--
arch/arm64/lib/copy_page.S | 38 ++---
arch/arm64/mm/proc.S | 73 +++++----
drivers/base/node.c | 6 +
fs/proc/meminfo.c | 4 +
include/linux/compiler-clang.h | 6 +
include/linux/compiler_types.h | 4 +
include/linux/mmzone.h | 3 +
include/linux/scs.h | 57 +++++++
init/init_task.c | 8 +
kernel/Makefile | 1 +
kernel/fork.c | 9 ++
kernel/kprobes.c | 38 ++---
kernel/sched/core.c | 2 +
kernel/sched/sched.h | 1 +
kernel/scs.c | 227 +++++++++++++++++++++++++++
mm/page_alloc.c | 6 +
mm/vmstat.c | 3 +
39 files changed, 634 insertions(+), 97 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c


base-commit: 0dbe6cb8f7e05bc9611602ef45980a6c57b245a3
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:13:09

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 01/17] arm64: mm: avoid x18 in idmap_kpti_install_ng_mappings

idmap_kpti_install_ng_mappings uses x18 as a temporary register, which
will result in a conflict when x18 is reserved. Use x16 and x17 instead
where needed.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/arm64/mm/proc.S | 63 ++++++++++++++++++++++----------------------
1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index a1e0592d1fbc..fdabf40a83c8 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -250,15 +250,15 @@ ENTRY(idmap_kpti_install_ng_mappings)
/* We're the boot CPU. Wait for the others to catch up */
sevl
1: wfe
- ldaxr w18, [flag_ptr]
- eor w18, w18, num_cpus
- cbnz w18, 1b
+ ldaxr w17, [flag_ptr]
+ eor w17, w17, num_cpus
+ cbnz w17, 1b

/* We need to walk swapper, so turn off the MMU. */
pre_disable_mmu_workaround
- mrs x18, sctlr_el1
- bic x18, x18, #SCTLR_ELx_M
- msr sctlr_el1, x18
+ mrs x17, sctlr_el1
+ bic x17, x17, #SCTLR_ELx_M
+ msr sctlr_el1, x17
isb

/* Everybody is enjoying the idmap, so we can rewrite swapper. */
@@ -281,9 +281,9 @@ skip_pgd:
isb

/* We're done: fire up the MMU again */
- mrs x18, sctlr_el1
- orr x18, x18, #SCTLR_ELx_M
- msr sctlr_el1, x18
+ mrs x17, sctlr_el1
+ orr x17, x17, #SCTLR_ELx_M
+ msr sctlr_el1, x17
isb

/*
@@ -353,46 +353,47 @@ skip_pte:
b.ne do_pte
b next_pmd

+ .unreq cpu
+ .unreq num_cpus
+ .unreq swapper_pa
+ .unreq cur_pgdp
+ .unreq end_pgdp
+ .unreq pgd
+ .unreq cur_pudp
+ .unreq end_pudp
+ .unreq pud
+ .unreq cur_pmdp
+ .unreq end_pmdp
+ .unreq pmd
+ .unreq cur_ptep
+ .unreq end_ptep
+ .unreq pte
+
/* Secondary CPUs end up here */
__idmap_kpti_secondary:
/* Uninstall swapper before surgery begins */
- __idmap_cpu_set_reserved_ttbr1 x18, x17
+ __idmap_cpu_set_reserved_ttbr1 x16, x17

/* Increment the flag to let the boot CPU we're ready */
-1: ldxr w18, [flag_ptr]
- add w18, w18, #1
- stxr w17, w18, [flag_ptr]
+1: ldxr w16, [flag_ptr]
+ add w16, w16, #1
+ stxr w17, w16, [flag_ptr]
cbnz w17, 1b

/* Wait for the boot CPU to finish messing around with swapper */
sevl
1: wfe
- ldxr w18, [flag_ptr]
- cbnz w18, 1b
+ ldxr w16, [flag_ptr]
+ cbnz w16, 1b

/* All done, act like nothing happened */
- offset_ttbr1 swapper_ttb, x18
+ offset_ttbr1 swapper_ttb, x16
msr ttbr1_el1, swapper_ttb
isb
ret

- .unreq cpu
- .unreq num_cpus
- .unreq swapper_pa
.unreq swapper_ttb
.unreq flag_ptr
- .unreq cur_pgdp
- .unreq end_pgdp
- .unreq pgd
- .unreq cur_pudp
- .unreq end_pudp
- .unreq pud
- .unreq cur_pmdp
- .unreq end_pmdp
- .unreq pmd
- .unreq cur_ptep
- .unreq end_ptep
- .unreq pte
ENDPROC(idmap_kpti_install_ng_mappings)
.popsection
#endif
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:13:12

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 02/17] arm64/lib: copy_page: avoid x18 register in assembler code

From: Ard Biesheuvel <[email protected]>

Register x18 will no longer be used as a caller save register in the
future, so stop using it in the copy_page() code.

Link: https://patchwork.kernel.org/patch/9836869/
Signed-off-by: Ard Biesheuvel <[email protected]>
[Sami: changed the offset and bias to be explicit]
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/arm64/lib/copy_page.S | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index bbb8562396af..290dd3c5266c 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -34,45 +34,45 @@ alternative_else_nop_endif
ldp x14, x15, [x1, #96]
ldp x16, x17, [x1, #112]

- mov x18, #(PAGE_SIZE - 128)
+ add x0, x0, #256
add x1, x1, #128
1:
- subs x18, x18, #128
+ tst x0, #(PAGE_SIZE - 1)

alternative_if ARM64_HAS_NO_HW_PREFETCH
prfm pldl1strm, [x1, #384]
alternative_else_nop_endif

- stnp x2, x3, [x0]
+ stnp x2, x3, [x0, #-256]
ldp x2, x3, [x1]
- stnp x4, x5, [x0, #16]
+ stnp x4, x5, [x0, #16 - 256]
ldp x4, x5, [x1, #16]
- stnp x6, x7, [x0, #32]
+ stnp x6, x7, [x0, #32 - 256]
ldp x6, x7, [x1, #32]
- stnp x8, x9, [x0, #48]
+ stnp x8, x9, [x0, #48 - 256]
ldp x8, x9, [x1, #48]
- stnp x10, x11, [x0, #64]
+ stnp x10, x11, [x0, #64 - 256]
ldp x10, x11, [x1, #64]
- stnp x12, x13, [x0, #80]
+ stnp x12, x13, [x0, #80 - 256]
ldp x12, x13, [x1, #80]
- stnp x14, x15, [x0, #96]
+ stnp x14, x15, [x0, #96 - 256]
ldp x14, x15, [x1, #96]
- stnp x16, x17, [x0, #112]
+ stnp x16, x17, [x0, #112 - 256]
ldp x16, x17, [x1, #112]

add x0, x0, #128
add x1, x1, #128

- b.gt 1b
+ b.ne 1b

- stnp x2, x3, [x0]
- stnp x4, x5, [x0, #16]
- stnp x6, x7, [x0, #32]
- stnp x8, x9, [x0, #48]
- stnp x10, x11, [x0, #64]
- stnp x12, x13, [x0, #80]
- stnp x14, x15, [x0, #96]
- stnp x16, x17, [x0, #112]
+ stnp x2, x3, [x0, #-256]
+ stnp x4, x5, [x0, #16 - 256]
+ stnp x6, x7, [x0, #32 - 256]
+ stnp x8, x9, [x0, #48 - 256]
+ stnp x10, x11, [x0, #64 - 256]
+ stnp x12, x13, [x0, #80 - 256]
+ stnp x14, x15, [x0, #96 - 256]
+ stnp x16, x17, [x0, #112 - 256]

ret
ENDPROC(copy_page)
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:13:19

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 04/17] arm64: kernel: avoid x18 __cpu_soft_restart

From: Ard Biesheuvel <[email protected]>

The code in __cpu_soft_restart() uses x18 as an arbitrary temp register,
which will shortly be disallowed. So use x8 instead.

Link: https://patchwork.kernel.org/patch/9836877/
Signed-off-by: Ard Biesheuvel <[email protected]>
[Sami: updated commit message]
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/kernel/cpu-reset.S | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 6ea337d464c4..32c7bf858dd9 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -42,11 +42,11 @@ ENTRY(__cpu_soft_restart)
mov x0, #HVC_SOFT_RESTART
hvc #0 // no return

-1: mov x18, x1 // entry
+1: mov x8, x1 // entry
mov x0, x2 // arg0
mov x1, x3 // arg1
mov x2, x4 // arg2
- br x18
+ br x8
ENDPROC(__cpu_soft_restart)

.popsection
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:13:26

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 05/17] add support for Clang's Shadow Call Stack (SCS)

This change adds generic support for Clang's Shadow Call Stack,
which uses a shadow stack to protect return addresses from being
overwritten by an attacker. Details are available here:

https://clang.llvm.org/docs/ShadowCallStack.html

Note that security guarantees in the kernel differ from the
ones documented for user space. The kernel must store addresses
of shadow stacks used by other tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
Makefile | 6 ++
arch/Kconfig | 33 +++++++
include/linux/compiler-clang.h | 6 ++
include/linux/compiler_types.h | 4 +
include/linux/scs.h | 57 +++++++++++
init/init_task.c | 8 ++
kernel/Makefile | 1 +
kernel/fork.c | 9 ++
kernel/sched/core.c | 2 +
kernel/sched/sched.h | 1 +
kernel/scs.c | 169 +++++++++++++++++++++++++++++++++
11 files changed, 296 insertions(+)
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c

diff --git a/Makefile b/Makefile
index 79be70bf2899..e6337314f8fb 100644
--- a/Makefile
+++ b/Makefile
@@ -846,6 +846,12 @@ ifdef CONFIG_LIVEPATCH
KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
endif

+ifdef CONFIG_SHADOW_CALL_STACK
+CC_FLAGS_SCS := -fsanitize=shadow-call-stack
+KBUILD_CFLAGS += $(CC_FLAGS_SCS)
+export CC_FLAGS_SCS
+endif
+
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)

diff --git a/arch/Kconfig b/arch/Kconfig
index 5f8a5d84dbbe..5e34cbcd8d6a 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -521,6 +521,39 @@ config STACKPROTECTOR_STRONG
about 20% of all kernel functions, which increases the kernel code
size by about 2%.

+config ARCH_SUPPORTS_SHADOW_CALL_STACK
+ bool
+ help
+ An architecture should select this if it supports Clang's Shadow
+ Call Stack, has asm/scs.h, and implements runtime support for shadow
+ stack switching.
+
+config SHADOW_CALL_STACK_VMAP
+ bool
+ depends on SHADOW_CALL_STACK
+ help
+ Use virtually mapped shadow call stacks. Selecting this option
+ provides better stack exhaustion protection, but increases per-thread
+ memory consumption as a full page is allocated for each shadow stack.
+
+config SHADOW_CALL_STACK
+ bool "Clang Shadow Call Stack"
+ depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
+ help
+ This option enables Clang's Shadow Call Stack, which uses a
+ shadow stack to protect function return addresses from being
+ overwritten by an attacker. More information can be found from
+ Clang's documentation:
+
+ https://clang.llvm.org/docs/ShadowCallStack.html
+
+ Note that security guarantees in the kernel differ from the ones
+ documented for user space. The kernel must store addresses of shadow
+ stacks used by other tasks and interrupt handlers in memory, which
+ means an attacker capable reading and writing arbitrary memory may
+ be able to locate them and hijack control flow by modifying shadow
+ stacks that are not currently in use.
+
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 333a6695a918..18fc4d29ef27 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -42,3 +42,9 @@
* compilers, like ICC.
*/
#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#if __has_feature(shadow_call_stack)
+# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
+#else
+# define __noscs
+#endif
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 72393a8c1a6c..be5d5be4b1ae 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -202,6 +202,10 @@ struct ftrace_likely_data {
# define randomized_struct_fields_end
#endif

+#ifndef __noscs
+# define __noscs
+#endif
+
#ifndef asm_volatile_goto
#define asm_volatile_goto(x...) asm goto(x)
#endif
diff --git a/include/linux/scs.h b/include/linux/scs.h
new file mode 100644
index 000000000000..bd5ef4278b91
--- /dev/null
+++ b/include/linux/scs.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#ifndef _LINUX_SCS_H
+#define _LINUX_SCS_H
+
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+/*
+ * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
+ * architecture) provided ~40% safety margin on stack usage while keeping
+ * memory allocation overhead reasonable.
+ */
+#define SCS_SIZE 1024
+#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
+
+/*
+ * A random number outside the kernel's virtual address space to mark the
+ * end of the shadow stack.
+ */
+#define SCS_END_MAGIC 0xaf0194819b1635f6UL
+
+#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack)
+
+static inline void task_set_scs(struct task_struct *tsk, void *s)
+{
+ task_scs(tsk) = s;
+}
+
+extern void scs_init(void);
+extern void scs_task_reset(struct task_struct *tsk);
+extern int scs_prepare(struct task_struct *tsk, int node);
+extern bool scs_corrupted(struct task_struct *tsk);
+extern void scs_release(struct task_struct *tsk);
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+#define task_scs(tsk) NULL
+
+static inline void task_set_scs(struct task_struct *tsk, void *s) {}
+static inline void scs_init(void) {}
+static inline void scs_task_reset(struct task_struct *tsk) {}
+static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
+static inline bool scs_corrupted(struct task_struct *tsk) { return false; }
+static inline void scs_release(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* _LINUX_SCS_H */
diff --git a/init/init_task.c b/init/init_task.c
index 9e5cbe5eab7b..cbd40460e903 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/audit.h>
#include <linux/numa.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <linux/uaccess.h>
@@ -184,6 +185,13 @@ struct task_struct init_task
};
EXPORT_SYMBOL(init_task);

+#ifdef CONFIG_SHADOW_CALL_STACK
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
+ __aligned(SCS_SIZE) = {
+ [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
+};
+#endif
+
/*
* Initial thread structure. Alignment of this is handled by a special
* linker map entry.
diff --git a/kernel/Makefile b/kernel/Makefile
index daad787fb795..313dbd44d576 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-$(CONFIG_PERF_EVENTS) += events/

diff --git a/kernel/fork.c b/kernel/fork.c
index bcdf53125210..3fa7ba64c62d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,6 +94,7 @@
#include <linux/livepatch.h>
#include <linux/thread_info.h>
#include <linux/stackleak.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -451,6 +452,8 @@ void put_task_stack(struct task_struct *tsk)

void free_task(struct task_struct *tsk)
{
+ scs_release(tsk);
+
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
@@ -834,6 +837,8 @@ void __init fork_init(void)
NULL, free_vm_stack_cache);
#endif

+ scs_init();
+
lockdep_init_task(&init_task);
uprobes_init();
}
@@ -893,6 +898,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_stack;

+ err = scs_prepare(tsk, node);
+ if (err)
+ goto free_stack;
+
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index dd05a378631a..e7faeb383008 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6013,6 +6013,8 @@ void init_idle(struct task_struct *idle, int cpu)
raw_spin_lock_irqsave(&idle->pi_lock, flags);
raw_spin_lock(&rq->lock);

+ scs_task_reset(idle);
+
__sched_fork(0, idle);
idle->state = TASK_RUNNING;
idle->se.exec_start = sched_clock();
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 0db2c1b3361e..c153003a011c 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -58,6 +58,7 @@
#include <linux/profile.h>
#include <linux/psi.h>
#include <linux/rcupdate_wait.h>
+#include <linux/scs.h>
#include <linux/security.h>
#include <linux/stop_machine.h>
#include <linux/suspend.h>
diff --git a/kernel/scs.c b/kernel/scs.c
new file mode 100644
index 000000000000..7c1a40020754
--- /dev/null
+++ b/kernel/scs.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/scs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/scs.h>
+
+static inline void *__scs_base(struct task_struct *tsk)
+{
+ /*
+ * We allow architectures to use the shadow_call_stack field in
+ * struct thread_info to store the current shadow stack pointer
+ * during context switches.
+ *
+ * This allows the implementation to also clear the field when
+ * the task is active to avoid keeping pointers to the current
+ * task's shadow stack in memory. This can make it harder for an
+ * attacker to locate the shadow stack, but also requires us to
+ * compute the base address when needed.
+ *
+ * We assume the stack is aligned to SCS_SIZE.
+ */
+ return (void *)((uintptr_t)task_scs(tsk) & ~(SCS_SIZE - 1));
+}
+
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+
+/* Keep a cache of shadow stacks */
+#define SCS_CACHE_SIZE 2
+static DEFINE_PER_CPU(void *, scs_cache[SCS_CACHE_SIZE]);
+
+static void *scs_alloc(int node)
+{
+ int i;
+
+ for (i = 0; i < SCS_CACHE_SIZE; i++) {
+ void *s;
+
+ s = this_cpu_xchg(scs_cache[i], NULL);
+ if (s) {
+ memset(s, 0, SCS_SIZE);
+ return s;
+ }
+ }
+
+ /*
+ * We allocate a full page for the shadow stack, which should be
+ * more than we need. Check the assumption nevertheless.
+ */
+ BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);
+
+ return __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL, 0,
+ node, __builtin_return_address(0));
+}
+
+static void scs_free(void *s)
+{
+ int i;
+
+ for (i = 0; i < SCS_CACHE_SIZE; i++)
+ if (this_cpu_cmpxchg(scs_cache[i], 0, s) == 0)
+ return;
+
+ vfree_atomic(s);
+}
+
+static int scs_cleanup(unsigned int cpu)
+{
+ int i;
+ void **cache = per_cpu_ptr(scs_cache, cpu);
+
+ for (i = 0; i < SCS_CACHE_SIZE; i++) {
+ vfree(cache[i]);
+ cache[i] = NULL;
+ }
+
+ return 0;
+}
+
+void __init scs_init(void)
+{
+ cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
+ scs_cleanup);
+}
+
+#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */
+
+static struct kmem_cache *scs_cache;
+
+static inline void *scs_alloc(int node)
+{
+ return kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
+}
+
+static inline void scs_free(void *s)
+{
+ kmem_cache_free(scs_cache, s);
+}
+
+void __init scs_init(void)
+{
+ scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
+ 0, NULL);
+ WARN_ON(!scs_cache);
+}
+
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+static inline unsigned long *scs_magic(struct task_struct *tsk)
+{
+ return (unsigned long *)(__scs_base(tsk) + SCS_SIZE) - 1;
+}
+
+static inline void scs_set_magic(struct task_struct *tsk)
+{
+ *scs_magic(tsk) = SCS_END_MAGIC;
+}
+
+void scs_task_reset(struct task_struct *tsk)
+{
+ /*
+ * Reset the shadow stack to the base address in case the task
+ * is reused.
+ */
+ task_set_scs(tsk, __scs_base(tsk));
+}
+
+int scs_prepare(struct task_struct *tsk, int node)
+{
+ void *s;
+
+ s = scs_alloc(node);
+ if (!s)
+ return -ENOMEM;
+
+ task_set_scs(tsk, s);
+ scs_set_magic(tsk);
+
+ return 0;
+}
+
+bool scs_corrupted(struct task_struct *tsk)
+{
+ return *scs_magic(tsk) != SCS_END_MAGIC;
+}
+
+void scs_release(struct task_struct *tsk)
+{
+ void *s;
+
+ s = __scs_base(tsk);
+ if (!s)
+ return;
+
+ WARN_ON(scs_corrupted(tsk));
+
+ task_set_scs(tsk, NULL);
+ scs_free(s);
+}
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:13:32

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 03/17] arm64: kvm: stop treating register x18 as caller save

From: Ard Biesheuvel <[email protected]>

In preparation of reserving x18, stop treating it as caller save in
the KVM guest entry/exit code. Currently, the code assumes there is
no need to preserve it for the host, given that it would have been
assumed clobbered anyway by the function call to __guest_enter().
Instead, preserve its value and restore it upon return.

Link: https://patchwork.kernel.org/patch/9836891/
Signed-off-by: Ard Biesheuvel <[email protected]>
[Sami: updated commit message, switched from x18 to x29 for the guest context]
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/kvm/hyp/entry.S | 41 +++++++++++++++++++-------------------
1 file changed, 20 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index e5cc8d66bf53..c3c2d842c609 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -23,6 +23,7 @@
.pushsection .hyp.text, "ax"

.macro save_callee_saved_regs ctxt
+ str x18, [\ctxt, #CPU_XREG_OFFSET(18)]
stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
@@ -32,6 +33,8 @@
.endm

.macro restore_callee_saved_regs ctxt
+ // We assume \ctxt is not x18-x28
+ ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)]
ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
@@ -48,7 +51,7 @@ ENTRY(__guest_enter)
// x0: vcpu
// x1: host context
// x2-x17: clobbered by macros
- // x18: guest context
+ // x29: guest context

// Store the host regs
save_callee_saved_regs x1
@@ -67,31 +70,28 @@ alternative_else_nop_endif
ret

1:
- add x18, x0, #VCPU_CONTEXT
+ add x29, x0, #VCPU_CONTEXT

// Macro ptrauth_switch_to_guest format:
// ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
// The below macro to restore guest keys is not implemented in C code
// as it may cause Pointer Authentication key signing mismatch errors
// when this feature is enabled for kernel code.
- ptrauth_switch_to_guest x18, x0, x1, x2
+ ptrauth_switch_to_guest x29, x0, x1, x2

// Restore guest regs x0-x17
- ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)]
- ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)]
- ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)]
- ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)]
- ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)]
- ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)]
- ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)]
- ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)]
- ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)]
-
- // Restore guest regs x19-x29, lr
- restore_callee_saved_regs x18
-
- // Restore guest reg x18
- ldr x18, [x18, #CPU_XREG_OFFSET(18)]
+ ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)]
+ ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)]
+ ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)]
+ ldp x6, x7, [x29, #CPU_XREG_OFFSET(6)]
+ ldp x8, x9, [x29, #CPU_XREG_OFFSET(8)]
+ ldp x10, x11, [x29, #CPU_XREG_OFFSET(10)]
+ ldp x12, x13, [x29, #CPU_XREG_OFFSET(12)]
+ ldp x14, x15, [x29, #CPU_XREG_OFFSET(14)]
+ ldp x16, x17, [x29, #CPU_XREG_OFFSET(16)]
+
+ // Restore guest regs x18-x29, lr
+ restore_callee_saved_regs x29

// Do not touch any register after this!
eret
@@ -114,7 +114,7 @@ ENTRY(__guest_exit)
// Retrieve the guest regs x0-x1 from the stack
ldp x2, x3, [sp], #16 // x0, x1

- // Store the guest regs x0-x1 and x4-x18
+ // Store the guest regs x0-x1 and x4-x17
stp x2, x3, [x1, #CPU_XREG_OFFSET(0)]
stp x4, x5, [x1, #CPU_XREG_OFFSET(4)]
stp x6, x7, [x1, #CPU_XREG_OFFSET(6)]
@@ -123,9 +123,8 @@ ENTRY(__guest_exit)
stp x12, x13, [x1, #CPU_XREG_OFFSET(12)]
stp x14, x15, [x1, #CPU_XREG_OFFSET(14)]
stp x16, x17, [x1, #CPU_XREG_OFFSET(16)]
- str x18, [x1, #CPU_XREG_OFFSET(18)]

- // Store the guest regs x19-x29, lr
+ // Store the guest regs x18-x29, lr
save_callee_saved_regs x1

get_host_ctxt x2, x3
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:13:32

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 07/17] scs: add support for stack usage debugging

Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks. When enabled,
also prints out the highest shadow stack usage per process.

Signed-off-by: Sami Tolvanen <[email protected]>
---
kernel/scs.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)

diff --git a/kernel/scs.c b/kernel/scs.c
index 7780fc4e29ac..67c43af627d1 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -167,6 +167,44 @@ int scs_prepare(struct task_struct *tsk, int node)
return 0;
}

+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long scs_used(struct task_struct *tsk)
+{
+ unsigned long *p = __scs_base(tsk);
+ unsigned long *end = scs_magic(tsk);
+ uintptr_t s = (uintptr_t)p;
+
+ while (p < end && *p)
+ p++;
+
+ return (uintptr_t)p - s;
+}
+
+static void scs_check_usage(struct task_struct *tsk)
+{
+ static DEFINE_SPINLOCK(lock);
+ static unsigned long highest;
+ unsigned long used = scs_used(tsk);
+
+ if (used <= highest)
+ return;
+
+ spin_lock(&lock);
+
+ if (used > highest) {
+ pr_info("%s: highest shadow stack usage %lu bytes\n",
+ __func__, used);
+ highest = used;
+ }
+
+ spin_unlock(&lock);
+}
+#else
+static inline void scs_check_usage(struct task_struct *tsk)
+{
+}
+#endif
+
bool scs_corrupted(struct task_struct *tsk)
{
return *scs_magic(tsk) != SCS_END_MAGIC;
@@ -181,6 +219,7 @@ void scs_release(struct task_struct *tsk)
return;

WARN_ON(scs_corrupted(tsk));
+ scs_check_usage(tsk);

scs_account(tsk, -1);
task_set_scs(tsk, NULL);
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:13:37

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 08/17] kprobes: fix compilation without CONFIG_KRETPROBES

kprobe_on_func_entry and arch_kprobe_on_func_entry need to be available
even if CONFIG_KRETPROBES is not selected.

Signed-off-by: Sami Tolvanen <[email protected]>
Acked-by: Masami Hiramatsu <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
kernel/kprobes.c | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 53534aa258a6..b5e20a4669b8 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1829,6 +1829,25 @@ unsigned long __weak arch_deref_entry_point(void *entry)
return (unsigned long)entry;
}

+bool __weak arch_kprobe_on_func_entry(unsigned long offset)
+{
+ return !offset;
+}
+
+bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
+{
+ kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
+
+ if (IS_ERR(kp_addr))
+ return false;
+
+ if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
+ !arch_kprobe_on_func_entry(offset))
+ return false;
+
+ return true;
+}
+
#ifdef CONFIG_KRETPROBES
/*
* This kprobe pre_handler is registered with every kretprobe. When probe
@@ -1885,25 +1904,6 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
}
NOKPROBE_SYMBOL(pre_handler_kretprobe);

-bool __weak arch_kprobe_on_func_entry(unsigned long offset)
-{
- return !offset;
-}
-
-bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
-{
- kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
-
- if (IS_ERR(kp_addr))
- return false;
-
- if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
- !arch_kprobe_on_func_entry(offset))
- return false;
-
- return true;
-}
-
int register_kretprobe(struct kretprobe *rp)
{
int ret = 0;
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:13:40

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 09/17] arm64: kprobes: fix kprobes without CONFIG_KRETPROBES

This allows CONFIG_KRETPROBES to be disabled without disabling
kprobes entirely.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/kernel/probes/kprobes.c | 2 ++
1 file changed, 2 insertions(+)

diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c
index c4452827419b..98230ae979ca 100644
--- a/arch/arm64/kernel/probes/kprobes.c
+++ b/arch/arm64/kernel/probes/kprobes.c
@@ -551,6 +551,7 @@ void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
return (void *)orig_ret_address;
}

+#ifdef CONFIG_KRETPROBES
void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
struct pt_regs *regs)
{
@@ -564,6 +565,7 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p)
{
return 0;
}
+#endif

int __init arch_init_kprobes(void)
{
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:13:50

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 12/17] arm64: reserve x18 from general allocation with SCS

Reserve the x18 register from general allocation when SCS is enabled,
because the compiler uses the register to store the current task's
shadow stack pointer. Note that all external kernel modules must also be
compiled with -ffixed-x18 if the kernel has SCS enabled.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/Makefile | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 2c0238ce0551..ef76101201b2 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -72,6 +72,10 @@ stack_protector_prepare: prepare0
include/generated/asm-offsets.h))
endif

+ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
+KBUILD_CFLAGS += -ffixed-x18
+endif
+
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
CHECKFLAGS += -D__AARCH64EB__
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:14:02

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 13/17] arm64: preserve x18 when CPU is suspended

Don't lose the current task's shadow stack when the CPU is suspended.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/mm/proc.S | 10 ++++++++++
2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 8939c87c4dce..0cde2f473971 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H

-#define NR_CTX_REGS 12
+#define NR_CTX_REGS 13
#define NR_CALLEE_SAVED_REGS 12

/*
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index fdabf40a83c8..5616dc52a033 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -49,6 +49,8 @@
* cpu_do_suspend - save CPU registers context
*
* x0: virtual address of context pointer
+ *
+ * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
*/
ENTRY(cpu_do_suspend)
mrs x2, tpidr_el0
@@ -73,6 +75,9 @@ alternative_endif
stp x8, x9, [x0, #48]
stp x10, x11, [x0, #64]
stp x12, x13, [x0, #80]
+#ifdef CONFIG_SHADOW_CALL_STACK
+ str x18, [x0, #96]
+#endif
ret
ENDPROC(cpu_do_suspend)

@@ -89,6 +94,11 @@ ENTRY(cpu_do_resume)
ldp x9, x10, [x0, #48]
ldp x11, x12, [x0, #64]
ldp x13, x14, [x0, #80]
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [x0, #96]
+ /* Clear the SCS pointer from the state buffer */
+ str xzr, [x0, #96]
+#endif
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:14:20

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 14/17] arm64: efi: restore x18 if it was corrupted

If we detect a corrupted x18 and SCS is enabled, restore the register
before jumping back to instrumented code. This is safe, because the
wrapper is called with preemption disabled and a separate shadow stack
is used for interrupt handling.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/kernel/efi-rt-wrapper.S | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 3fc71106cb2b..945744f16086 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -34,5 +34,10 @@ ENTRY(__efi_rt_asm_wrapper)
ldp x29, x30, [sp], #32
b.ne 0f
ret
-0: b efi_handle_corrupted_x18 // tail call
+0:
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* Restore x18 before returning to instrumented code. */
+ mov x18, x2
+#endif
+ b efi_handle_corrupted_x18 // tail call
ENDPROC(__efi_rt_asm_wrapper)
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:14:44

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 06/17] scs: add accounting

This change adds accounting for the memory allocated for shadow stacks.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
drivers/base/node.c | 6 ++++++
fs/proc/meminfo.c | 4 ++++
include/linux/mmzone.h | 3 +++
kernel/scs.c | 19 +++++++++++++++++++
mm/page_alloc.c | 6 ++++++
mm/vmstat.c | 3 +++
6 files changed, 41 insertions(+)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 296546ffed6c..111e58ec231e 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d AnonPages: %8lu kB\n"
"Node %d Shmem: %8lu kB\n"
"Node %d KernelStack: %8lu kB\n"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ "Node %d ShadowCallStack:%8lu kB\n"
+#endif
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
@@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8c1f1bb1a5ce..49768005a79e 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB));
+#ifdef CONFIG_SHADOW_CALL_STACK
+ seq_printf(m, "ShadowCallStack:%8lu kB\n",
+ global_zone_page_state(NR_KERNEL_SCS_BYTES) / 1024);
+#endif
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bda20282746b..fcb8c1708f9e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -200,6 +200,9 @@ enum zone_stat_item {
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ NR_KERNEL_SCS_BYTES, /* measured in bytes */
+#endif
/* Second 128 byte cacheline */
NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
diff --git a/kernel/scs.c b/kernel/scs.c
index 7c1a40020754..7780fc4e29ac 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -11,6 +11,7 @@
#include <linux/scs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/vmstat.h>
#include <asm/scs.h>

static inline void *__scs_base(struct task_struct *tsk)
@@ -74,6 +75,11 @@ static void scs_free(void *s)
vfree_atomic(s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return vmalloc_to_page(__scs_base(tsk));
+}
+
static int scs_cleanup(unsigned int cpu)
{
int i;
@@ -107,6 +113,11 @@ static inline void scs_free(void *s)
kmem_cache_free(scs_cache, s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return virt_to_page(__scs_base(tsk));
+}
+
void __init scs_init(void)
{
scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
@@ -135,6 +146,12 @@ void scs_task_reset(struct task_struct *tsk)
task_set_scs(tsk, __scs_base(tsk));
}

+static void scs_account(struct task_struct *tsk, int account)
+{
+ mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_BYTES,
+ account * SCS_SIZE);
+}
+
int scs_prepare(struct task_struct *tsk, int node)
{
void *s;
@@ -145,6 +162,7 @@ int scs_prepare(struct task_struct *tsk, int node)

task_set_scs(tsk, s);
scs_set_magic(tsk);
+ scs_account(tsk, 1);

return 0;
}
@@ -164,6 +182,7 @@ void scs_release(struct task_struct *tsk)

WARN_ON(scs_corrupted(tsk));

+ scs_account(tsk, -1);
task_set_scs(tsk, NULL);
scs_free(s);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ecc3dbad606b..fe17d69d98a7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5361,6 +5361,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" managed:%lukB"
" mlocked:%lukB"
" kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ " shadow_call_stack:%lukB"
+#endif
" pagetables:%lukB"
" bounce:%lukB"
" free_pcp:%lukB"
@@ -5382,6 +5385,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)),
zone_page_state(zone, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ zone_page_state(zone, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
K(zone_page_state(zone, NR_PAGETABLE)),
K(zone_page_state(zone, NR_BOUNCE)),
K(free_pcp),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6afc892a148a..9fe4afe670fe 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1118,6 +1118,9 @@ const char * const vmstat_text[] = {
"nr_mlock",
"nr_page_table_pages",
"nr_kernel_stack",
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ "nr_shadow_call_stack_bytes",
+#endif
"nr_bounce",
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:14:50

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 17/17] arm64: implement Shadow Call Stack

This change implements shadow stack switching, initial SCS set-up,
and interrupt shadow stacks for arm64.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/Kconfig | 5 ++++
arch/arm64/include/asm/scs.h | 37 ++++++++++++++++++++++++++
arch/arm64/include/asm/stacktrace.h | 4 +++
arch/arm64/include/asm/thread_info.h | 3 +++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +++
arch/arm64/kernel/entry.S | 28 ++++++++++++++++++++
arch/arm64/kernel/head.S | 9 +++++++
arch/arm64/kernel/irq.c | 2 ++
arch/arm64/kernel/process.c | 2 ++
arch/arm64/kernel/scs.c | 39 ++++++++++++++++++++++++++++
arch/arm64/kernel/smp.c | 4 +++
12 files changed, 137 insertions(+)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 42867174920f..f4c94c5e8012 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -66,6 +66,7 @@ config ARM64
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_SUPPORTS_MEMORY_FAILURE
+ select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG
select ARCH_SUPPORTS_NUMA_BALANCING
@@ -948,6 +949,10 @@ config ARCH_HAS_CACHE_LINE_SIZE
config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y if PGTABLE_LEVELS > 2

+# Supported by clang >= 7.0
+config CC_HAVE_SHADOW_CALL_STACK
+ def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
+
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
---help---
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
new file mode 100644
index 000000000000..c50d2b0c6c5f
--- /dev/null
+++ b/arch/arm64/include/asm/scs.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/scs.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+extern void scs_init_irq(void);
+
+static __always_inline void scs_save(struct task_struct *tsk)
+{
+ void *s;
+
+ asm volatile("mov %0, x18" : "=r" (s));
+ task_set_scs(tsk, s);
+}
+
+static inline void scs_overflow_check(struct task_struct *tsk)
+{
+ if (unlikely(scs_corrupted(tsk)))
+ panic("corrupted shadow stack detected inside scheduler\n");
+}
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+static inline void scs_init_irq(void) {}
+static inline void scs_save(struct task_struct *tsk) {}
+static inline void scs_overflow_check(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* __ASSEMBLY __ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 4d9b1f48dc39..b6cf32fb4efe 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -68,6 +68,10 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);

DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);

+#ifdef CONFIG_SHADOW_CALL_STACK
+DECLARE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+#endif
+
static inline bool on_irq_stack(unsigned long sp,
struct stack_info *info)
{
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index f0cec4160136..8c73764b9ed2 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -41,6 +41,9 @@ struct thread_info {
#endif
} preempt;
};
+#ifdef CONFIG_SHADOW_CALL_STACK
+ void *shadow_call_stack;
+#endif
};

#define thread_saved_pc(tsk) \
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 478491f07b4f..b3995329d9e5 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-y += vdso/ probes/
obj-$(CONFIG_COMPAT_VDSO) += vdso32/
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 214685760e1c..f6762b9ae1e1 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -33,6 +33,9 @@ int main(void)
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
+#endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+ DEFINE(TSK_TI_SCS, offsetof(struct task_struct, thread_info.shadow_call_stack));
#endif
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
#ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index cf3bd2976e57..1eff08c71403 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -172,6 +172,10 @@ alternative_cb_end

apply_ssbd 1, x22, x23

+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [tsk, #TSK_TI_SCS] // Restore shadow call stack
+ str xzr, [tsk, #TSK_TI_SCS] // Limit visibility of saved SCS
+#endif
.else
add x21, sp, #S_FRAME_SIZE
get_current_task tsk
@@ -278,6 +282,12 @@ alternative_else_nop_endif
ct_user_enter
.endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ .if \el == 0
+ str x18, [tsk, #TSK_TI_SCS] // Save shadow call stack
+ .endif
+#endif
+
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
@@ -383,6 +393,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

.macro irq_stack_entry
mov x19, sp // preserve the original sp
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x20, x18 // preserve the original shadow stack
+#endif

/*
* Compare sp with the base of the task stack.
@@ -400,6 +413,12 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

/* switch to the irq stack */
mov sp, x26
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* also switch to the irq shadow stack */
+ ldr_this_cpu x18, irq_shadow_call_stack_ptr, x26
+#endif
+
9998:
.endm

@@ -409,6 +428,10 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
*/
.macro irq_stack_exit
mov sp, x19
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* x20 is also preserved */
+ mov x18, x20
+#endif
.endm

/* GPRs used by entry code */
@@ -1155,6 +1178,11 @@ ENTRY(cpu_switch_to)
ldr lr, [x8]
mov sp, x9
msr sp_el0, x1
+#ifdef CONFIG_SHADOW_CALL_STACK
+ str x18, [x0, #TSK_TI_SCS]
+ ldr x18, [x1, #TSK_TI_SCS]
+ str xzr, [x1, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
ret
ENDPROC(cpu_switch_to)
NOKPROBE(cpu_switch_to)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 989b1944cb71..2be977c6496f 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -27,6 +27,7 @@
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/scs.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
@@ -424,6 +425,10 @@ __primary_switched:
stp xzr, x30, [sp, #-16]!
mov x29, sp

+#ifdef CONFIG_SHADOW_CALL_STACK
+ adr_l x18, init_shadow_call_stack // Set shadow call stack
+#endif
+
str_l x21, __fdt_pointer, x5 // Save FDT pointer

ldr_l x4, kimage_vaddr // Save the offset between
@@ -731,6 +736,10 @@ __secondary_switched:
ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow
msr sp_el0, x2
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [x2, #TSK_TI_SCS] // Set shadow call stack
+ str xzr, [x2, #TSK_TI_SCS]
+#endif
mov x29, #0
mov x30, #0
b secondary_start_kernel
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 04a327ccf84d..fe0ca522ff60 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -21,6 +21,7 @@
#include <linux/vmalloc.h>
#include <asm/daifflags.h>
#include <asm/vmap_stack.h>
+#include <asm/scs.h>

unsigned long irq_err_count;

@@ -63,6 +64,7 @@ static void init_irq_stacks(void)
void __init init_IRQ(void)
{
init_irq_stacks();
+ scs_init_irq();
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 71f788cd2b18..5f0aec285848 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -52,6 +52,7 @@
#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/pointer_auth.h>
+#include <asm/scs.h>
#include <asm/stacktrace.h>

#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
@@ -507,6 +508,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
uao_thread_switch(next);
ptrauth_thread_switch(next);
ssbs_thread_switch(next);
+ scs_overflow_check(next);

/*
* Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
new file mode 100644
index 000000000000..6f255072c9a9
--- /dev/null
+++ b/arch/arm64/kernel/scs.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/percpu.h>
+#include <linux/vmalloc.h>
+#include <asm/scs.h>
+
+DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+
+#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
+DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
+ __aligned(SCS_SIZE);
+#endif
+
+void scs_init_irq(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+ unsigned long *p;
+
+ p = __vmalloc_node_range(SCS_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ SCS_GFP, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
+#else
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ per_cpu(irq_shadow_call_stack, cpu);
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+ }
+}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dc9fe879c279..cc1938a585d2 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -44,6 +44,7 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
+#include <asm/scs.h>
#include <asm/smp_plat.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -357,6 +358,9 @@ void cpu_die(void)
{
unsigned int cpu = smp_processor_id();

+ /* Save the shadow stack pointer before exiting the idle task */
+ scs_save(current);
+
idle_task_exit();

local_daif_mask();
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:15:05

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 11/17] arm64: disable function graph tracing with SCS

With CONFIG_FUNCTION_GRAPH_TRACER, function return addresses are
modified in ftrace_graph_caller and prepare_ftrace_return to redirect
control flow to ftrace_return_to_handler. This is incompatible with
SCS.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index e7b57a8a5531..42867174920f 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -148,7 +148,7 @@ config ARM64
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
- select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FUNCTION_GRAPH_TRACER if !SHADOW_CALL_STACK
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IRQ_TIME_ACCOUNTING
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:15:47

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 15/17] arm64: vdso: disable Shadow Call Stack

Shadow stacks are only available in the kernel, so disable SCS
instrumentation for the vDSO.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/kernel/vdso/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index dd2514bb1511..a87a4f11724e 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -25,7 +25,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING

VDSO_LDFLAGS := -Bsymbolic

-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS)
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:17:00

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 16/17] arm64: disable SCS for hypervisor code

Filter out CC_FLAGS_SCS for code that runs at a different exception
level.

Suggested-by: Steven Rostedt (VMware) <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/kvm/hyp/Makefile | 3 +++
1 file changed, 3 insertions(+)

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index ea710f674cb6..17ea3da325e9 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -28,3 +28,6 @@ GCOV_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
+
+# remove the SCS flags from all objects in this directory
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-01 22:38:25

by Miguel Ojeda

[permalink] [raw]
Subject: Re: [PATCH v4 05/17] add support for Clang's Shadow Call Stack (SCS)

On Fri, Nov 1, 2019 at 11:12 PM Sami Tolvanen <[email protected]> wrote:
>
> This change adds generic support for Clang's Shadow Call Stack,
> which uses a shadow stack to protect return addresses from being
> overwritten by an attacker. Details are available here:
>
> https://clang.llvm.org/docs/ShadowCallStack.html
>
> Note that security guarantees in the kernel differ from the
> ones documented for user space. The kernel must store addresses
> of shadow stacks used by other tasks and interrupt handlers in
> memory, which means an attacker capable reading and writing
> arbitrary memory may be able to locate them and hijack control
> flow by modifying shadow stacks that are not currently in use.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>

Reviewed-by: Miguel Ojeda <[email protected]>

Cheers,
Miguel

2019-11-01 23:00:28

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v4 10/17] arm64: disable kretprobes with SCS

With CONFIG_KRETPROBES, function return addresses are modified to
redirect control flow to kretprobe_trampoline. This is incompatible
with SCS.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3f047afb982c..e7b57a8a5531 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -165,7 +165,7 @@ config ARM64
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_KPROBES
- select HAVE_KRETPROBES
+ select HAVE_KRETPROBES if !SHADOW_CALL_STACK
select HAVE_GENERIC_VDSO
select IOMMU_DMA if IOMMU_SUPPORT
select IRQ_DOMAIN
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-02 17:33:37

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v4 07/17] scs: add support for stack usage debugging

On Fri, Nov 01, 2019 at 03:11:40PM -0700, Sami Tolvanen wrote:
> Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks. When enabled,
> also prints out the highest shadow stack usage per process.
>
> Signed-off-by: Sami Tolvanen <[email protected]>

Thanks for helping me find this Kconfig. :) :)

Reviewed-by: Kees Cook <[email protected]>

-Kees

> ---
> kernel/scs.c | 39 +++++++++++++++++++++++++++++++++++++++
> 1 file changed, 39 insertions(+)
>
> diff --git a/kernel/scs.c b/kernel/scs.c
> index 7780fc4e29ac..67c43af627d1 100644
> --- a/kernel/scs.c
> +++ b/kernel/scs.c
> @@ -167,6 +167,44 @@ int scs_prepare(struct task_struct *tsk, int node)
> return 0;
> }
>
> +#ifdef CONFIG_DEBUG_STACK_USAGE
> +static inline unsigned long scs_used(struct task_struct *tsk)
> +{
> + unsigned long *p = __scs_base(tsk);
> + unsigned long *end = scs_magic(tsk);
> + uintptr_t s = (uintptr_t)p;
> +
> + while (p < end && *p)
> + p++;
> +
> + return (uintptr_t)p - s;
> +}
> +
> +static void scs_check_usage(struct task_struct *tsk)
> +{
> + static DEFINE_SPINLOCK(lock);
> + static unsigned long highest;
> + unsigned long used = scs_used(tsk);
> +
> + if (used <= highest)
> + return;
> +
> + spin_lock(&lock);
> +
> + if (used > highest) {
> + pr_info("%s: highest shadow stack usage %lu bytes\n",
> + __func__, used);
> + highest = used;
> + }
> +
> + spin_unlock(&lock);
> +}
> +#else
> +static inline void scs_check_usage(struct task_struct *tsk)
> +{
> +}
> +#endif
> +
> bool scs_corrupted(struct task_struct *tsk)
> {
> return *scs_magic(tsk) != SCS_END_MAGIC;
> @@ -181,6 +219,7 @@ void scs_release(struct task_struct *tsk)
> return;
>
> WARN_ON(scs_corrupted(tsk));
> + scs_check_usage(tsk);
>
> scs_account(tsk, -1);
> task_set_scs(tsk, NULL);
> --
> 2.24.0.rc1.363.gb1bccd3e3d-goog
>

--
Kees Cook

2019-11-04 11:06:20

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH v4 03/17] arm64: kvm: stop treating register x18 as caller save

Hi Sami,

On 2019-11-01 23:20, Sami Tolvanen wrote:
> From: Ard Biesheuvel <[email protected]>
>
> In preparation of reserving x18, stop treating it as caller save in
> the KVM guest entry/exit code. Currently, the code assumes there is
> no need to preserve it for the host, given that it would have been
> assumed clobbered anyway by the function call to __guest_enter().
> Instead, preserve its value and restore it upon return.
>
> Link: https://patchwork.kernel.org/patch/9836891/
> Signed-off-by: Ard Biesheuvel <[email protected]>
> [Sami: updated commit message, switched from x18 to x29 for the guest
> context]
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>

If you intend for this to be merged via the arm64 tree, please add my

Reviewed-by: Marc Zyngier <[email protected]>

Thanks,

M.
--
Jazz is not dead. It just smells funny...

2019-11-04 11:41:15

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v4 04/17] arm64: kernel: avoid x18 __cpu_soft_restart

On Fri, Nov 01, 2019 at 03:11:37PM -0700, Sami Tolvanen wrote:
> From: Ard Biesheuvel <[email protected]>
>
> The code in __cpu_soft_restart() uses x18 as an arbitrary temp register,
> which will shortly be disallowed. So use x8 instead.
>
> Link: https://patchwork.kernel.org/patch/9836877/
> Signed-off-by: Ard Biesheuvel <[email protected]>
> [Sami: updated commit message]
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Mark Rutland <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>

Trivial nit, but the commit title is missing "in" between x18 and
__cpu_soft_restart.

Mark.

> ---
> arch/arm64/kernel/cpu-reset.S | 4 ++--
> 1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
> index 6ea337d464c4..32c7bf858dd9 100644
> --- a/arch/arm64/kernel/cpu-reset.S
> +++ b/arch/arm64/kernel/cpu-reset.S
> @@ -42,11 +42,11 @@ ENTRY(__cpu_soft_restart)
> mov x0, #HVC_SOFT_RESTART
> hvc #0 // no return
>
> -1: mov x18, x1 // entry
> +1: mov x8, x1 // entry
> mov x0, x2 // arg0
> mov x1, x3 // arg1
> mov x2, x4 // arg2
> - br x18
> + br x8
> ENDPROC(__cpu_soft_restart)
>
> .popsection
> --
> 2.24.0.rc1.363.gb1bccd3e3d-goog
>

2019-11-04 11:52:59

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v4 03/17] arm64: kvm: stop treating register x18 as caller save

On Fri, Nov 01, 2019 at 03:11:36PM -0700, Sami Tolvanen wrote:
> From: Ard Biesheuvel <[email protected]>
>
> In preparation of reserving x18, stop treating it as caller save in
> the KVM guest entry/exit code. Currently, the code assumes there is
> no need to preserve it for the host, given that it would have been
> assumed clobbered anyway by the function call to __guest_enter().
> Instead, preserve its value and restore it upon return.
>
> Link: https://patchwork.kernel.org/patch/9836891/
> Signed-off-by: Ard Biesheuvel <[email protected]>
> [Sami: updated commit message, switched from x18 to x29 for the guest context]
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
> arch/arm64/kvm/hyp/entry.S | 41 +++++++++++++++++++-------------------
> 1 file changed, 20 insertions(+), 21 deletions(-)
>
> diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
> index e5cc8d66bf53..c3c2d842c609 100644
> --- a/arch/arm64/kvm/hyp/entry.S
> +++ b/arch/arm64/kvm/hyp/entry.S
> @@ -23,6 +23,7 @@
> .pushsection .hyp.text, "ax"
>

Could we please add a note here, e.g.

/*
* We treat x18 as callee-saved as the host may use it as a platform
* register (e.g. for shadow call stack).
*/

... as that will avoid anyone trying to optimize this away in future
after reading the AAPCS.

> .macro save_callee_saved_regs ctxt
> + str x18, [\ctxt, #CPU_XREG_OFFSET(18)]
> stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
> stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
> stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
> @@ -32,6 +33,8 @@
> .endm
>
> .macro restore_callee_saved_regs ctxt
> + // We assume \ctxt is not x18-x28

Probably worth s/assume/require/ here.

Otherwise, this looks godo to me:

Reviewed-by: Mark Rutland <[email protected]>

Mark.

> + ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)]
> ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
> ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
> ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
> @@ -48,7 +51,7 @@ ENTRY(__guest_enter)
> // x0: vcpu
> // x1: host context
> // x2-x17: clobbered by macros
> - // x18: guest context
> + // x29: guest context
>
> // Store the host regs
> save_callee_saved_regs x1
> @@ -67,31 +70,28 @@ alternative_else_nop_endif
> ret
>
> 1:
> - add x18, x0, #VCPU_CONTEXT
> + add x29, x0, #VCPU_CONTEXT
>
> // Macro ptrauth_switch_to_guest format:
> // ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
> // The below macro to restore guest keys is not implemented in C code
> // as it may cause Pointer Authentication key signing mismatch errors
> // when this feature is enabled for kernel code.
> - ptrauth_switch_to_guest x18, x0, x1, x2
> + ptrauth_switch_to_guest x29, x0, x1, x2
>
> // Restore guest regs x0-x17
> - ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)]
> - ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)]
> - ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)]
> - ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)]
> - ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)]
> - ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)]
> - ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)]
> - ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)]
> - ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)]
> -
> - // Restore guest regs x19-x29, lr
> - restore_callee_saved_regs x18
> -
> - // Restore guest reg x18
> - ldr x18, [x18, #CPU_XREG_OFFSET(18)]
> + ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)]
> + ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)]
> + ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)]
> + ldp x6, x7, [x29, #CPU_XREG_OFFSET(6)]
> + ldp x8, x9, [x29, #CPU_XREG_OFFSET(8)]
> + ldp x10, x11, [x29, #CPU_XREG_OFFSET(10)]
> + ldp x12, x13, [x29, #CPU_XREG_OFFSET(12)]
> + ldp x14, x15, [x29, #CPU_XREG_OFFSET(14)]
> + ldp x16, x17, [x29, #CPU_XREG_OFFSET(16)]
> +
> + // Restore guest regs x18-x29, lr
> + restore_callee_saved_regs x29
>
> // Do not touch any register after this!
> eret
> @@ -114,7 +114,7 @@ ENTRY(__guest_exit)
> // Retrieve the guest regs x0-x1 from the stack
> ldp x2, x3, [sp], #16 // x0, x1
>
> - // Store the guest regs x0-x1 and x4-x18
> + // Store the guest regs x0-x1 and x4-x17
> stp x2, x3, [x1, #CPU_XREG_OFFSET(0)]
> stp x4, x5, [x1, #CPU_XREG_OFFSET(4)]
> stp x6, x7, [x1, #CPU_XREG_OFFSET(6)]
> @@ -123,9 +123,8 @@ ENTRY(__guest_exit)
> stp x12, x13, [x1, #CPU_XREG_OFFSET(12)]
> stp x14, x15, [x1, #CPU_XREG_OFFSET(14)]
> stp x16, x17, [x1, #CPU_XREG_OFFSET(16)]
> - str x18, [x1, #CPU_XREG_OFFSET(18)]
>
> - // Store the guest regs x19-x29, lr
> + // Store the guest regs x18-x29, lr
> save_callee_saved_regs x1
>
> get_host_ctxt x2, x3
> --
> 2.24.0.rc1.363.gb1bccd3e3d-goog
>

2019-11-04 12:32:45

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v4 05/17] add support for Clang's Shadow Call Stack (SCS)

On Fri, Nov 01, 2019 at 03:11:38PM -0700, Sami Tolvanen wrote:
> This change adds generic support for Clang's Shadow Call Stack,
> which uses a shadow stack to protect return addresses from being
> overwritten by an attacker. Details are available here:
>
> https://clang.llvm.org/docs/ShadowCallStack.html
>
> Note that security guarantees in the kernel differ from the
> ones documented for user space. The kernel must store addresses
> of shadow stacks used by other tasks and interrupt handlers in
> memory, which means an attacker capable reading and writing
> arbitrary memory may be able to locate them and hijack control
> flow by modifying shadow stacks that are not currently in use.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
> Makefile | 6 ++
> arch/Kconfig | 33 +++++++
> include/linux/compiler-clang.h | 6 ++
> include/linux/compiler_types.h | 4 +
> include/linux/scs.h | 57 +++++++++++
> init/init_task.c | 8 ++
> kernel/Makefile | 1 +
> kernel/fork.c | 9 ++
> kernel/sched/core.c | 2 +
> kernel/sched/sched.h | 1 +
> kernel/scs.c | 169 +++++++++++++++++++++++++++++++++
> 11 files changed, 296 insertions(+)
> create mode 100644 include/linux/scs.h
> create mode 100644 kernel/scs.c
>
> diff --git a/Makefile b/Makefile
> index 79be70bf2899..e6337314f8fb 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -846,6 +846,12 @@ ifdef CONFIG_LIVEPATCH
> KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
> endif
>
> +ifdef CONFIG_SHADOW_CALL_STACK
> +CC_FLAGS_SCS := -fsanitize=shadow-call-stack
> +KBUILD_CFLAGS += $(CC_FLAGS_SCS)
> +export CC_FLAGS_SCS
> +endif
> +
> # arch Makefile may override CC so keep this after arch Makefile is included
> NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 5f8a5d84dbbe..5e34cbcd8d6a 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -521,6 +521,39 @@ config STACKPROTECTOR_STRONG
> about 20% of all kernel functions, which increases the kernel code
> size by about 2%.
>
> +config ARCH_SUPPORTS_SHADOW_CALL_STACK
> + bool
> + help
> + An architecture should select this if it supports Clang's Shadow
> + Call Stack, has asm/scs.h, and implements runtime support for shadow
> + stack switching.
> +
> +config SHADOW_CALL_STACK_VMAP
> + bool
> + depends on SHADOW_CALL_STACK
> + help
> + Use virtually mapped shadow call stacks. Selecting this option
> + provides better stack exhaustion protection, but increases per-thread
> + memory consumption as a full page is allocated for each shadow stack.
> +
> +config SHADOW_CALL_STACK
> + bool "Clang Shadow Call Stack"
> + depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
> + help
> + This option enables Clang's Shadow Call Stack, which uses a
> + shadow stack to protect function return addresses from being
> + overwritten by an attacker. More information can be found from
> + Clang's documentation:
> +
> + https://clang.llvm.org/docs/ShadowCallStack.html
> +
> + Note that security guarantees in the kernel differ from the ones
> + documented for user space. The kernel must store addresses of shadow
> + stacks used by other tasks and interrupt handlers in memory, which
> + means an attacker capable reading and writing arbitrary memory may
> + be able to locate them and hijack control flow by modifying shadow
> + stacks that are not currently in use.
> +
> config HAVE_ARCH_WITHIN_STACK_FRAMES
> bool
> help
> diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
> index 333a6695a918..18fc4d29ef27 100644
> --- a/include/linux/compiler-clang.h
> +++ b/include/linux/compiler-clang.h
> @@ -42,3 +42,9 @@
> * compilers, like ICC.
> */
> #define barrier() __asm__ __volatile__("" : : : "memory")
> +
> +#if __has_feature(shadow_call_stack)
> +# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
> +#else
> +# define __noscs
> +#endif
> diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
> index 72393a8c1a6c..be5d5be4b1ae 100644
> --- a/include/linux/compiler_types.h
> +++ b/include/linux/compiler_types.h
> @@ -202,6 +202,10 @@ struct ftrace_likely_data {
> # define randomized_struct_fields_end
> #endif
>
> +#ifndef __noscs
> +# define __noscs
> +#endif
> +
> #ifndef asm_volatile_goto
> #define asm_volatile_goto(x...) asm goto(x)
> #endif
> diff --git a/include/linux/scs.h b/include/linux/scs.h
> new file mode 100644
> index 000000000000..bd5ef4278b91
> --- /dev/null
> +++ b/include/linux/scs.h
> @@ -0,0 +1,57 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Shadow Call Stack support.
> + *
> + * Copyright (C) 2019 Google LLC
> + */
> +
> +#ifndef _LINUX_SCS_H
> +#define _LINUX_SCS_H
> +
> +#include <linux/gfp.h>
> +#include <linux/sched.h>
> +#include <asm/page.h>
> +
> +#ifdef CONFIG_SHADOW_CALL_STACK
> +
> +/*
> + * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
> + * architecture) provided ~40% safety margin on stack usage while keeping
> + * memory allocation overhead reasonable.
> + */
> +#define SCS_SIZE 1024

To make it easier to reason about type promotion rules (and avoid that
we accidentaly mask out high bits when using this to generate a mask),
can we please make this 1024UL?

> +#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
> +
> +/*
> + * A random number outside the kernel's virtual address space to mark the
> + * end of the shadow stack.
> + */
> +#define SCS_END_MAGIC 0xaf0194819b1635f6UL
> +
> +#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack)
> +
> +static inline void task_set_scs(struct task_struct *tsk, void *s)
> +{
> + task_scs(tsk) = s;
> +}
> +
> +extern void scs_init(void);
> +extern void scs_task_reset(struct task_struct *tsk);
> +extern int scs_prepare(struct task_struct *tsk, int node);
> +extern bool scs_corrupted(struct task_struct *tsk);
> +extern void scs_release(struct task_struct *tsk);
> +
> +#else /* CONFIG_SHADOW_CALL_STACK */
> +
> +#define task_scs(tsk) NULL
> +
> +static inline void task_set_scs(struct task_struct *tsk, void *s) {}
> +static inline void scs_init(void) {}
> +static inline void scs_task_reset(struct task_struct *tsk) {}
> +static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
> +static inline bool scs_corrupted(struct task_struct *tsk) { return false; }
> +static inline void scs_release(struct task_struct *tsk) {}
> +
> +#endif /* CONFIG_SHADOW_CALL_STACK */
> +
> +#endif /* _LINUX_SCS_H */
> diff --git a/init/init_task.c b/init/init_task.c
> index 9e5cbe5eab7b..cbd40460e903 100644
> --- a/init/init_task.c
> +++ b/init/init_task.c
> @@ -11,6 +11,7 @@
> #include <linux/mm.h>
> #include <linux/audit.h>
> #include <linux/numa.h>
> +#include <linux/scs.h>
>
> #include <asm/pgtable.h>
> #include <linux/uaccess.h>
> @@ -184,6 +185,13 @@ struct task_struct init_task
> };
> EXPORT_SYMBOL(init_task);
>
> +#ifdef CONFIG_SHADOW_CALL_STACK
> +unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
> + __aligned(SCS_SIZE) = {
> + [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
> +};
> +#endif
> +
> /*
> * Initial thread structure. Alignment of this is handled by a special
> * linker map entry.
> diff --git a/kernel/Makefile b/kernel/Makefile
> index daad787fb795..313dbd44d576 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -102,6 +102,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
> obj-$(CONFIG_IRQ_WORK) += irq_work.o
> obj-$(CONFIG_CPU_PM) += cpu_pm.o
> obj-$(CONFIG_BPF) += bpf/
> +obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
>
> obj-$(CONFIG_PERF_EVENTS) += events/
>
> diff --git a/kernel/fork.c b/kernel/fork.c
> index bcdf53125210..3fa7ba64c62d 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -94,6 +94,7 @@
> #include <linux/livepatch.h>
> #include <linux/thread_info.h>
> #include <linux/stackleak.h>
> +#include <linux/scs.h>
>
> #include <asm/pgtable.h>
> #include <asm/pgalloc.h>
> @@ -451,6 +452,8 @@ void put_task_stack(struct task_struct *tsk)
>
> void free_task(struct task_struct *tsk)
> {
> + scs_release(tsk);
> +
> #ifndef CONFIG_THREAD_INFO_IN_TASK
> /*
> * The task is finally done with both the stack and thread_info,
> @@ -834,6 +837,8 @@ void __init fork_init(void)
> NULL, free_vm_stack_cache);
> #endif
>
> + scs_init();
> +
> lockdep_init_task(&init_task);
> uprobes_init();
> }
> @@ -893,6 +898,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
> if (err)
> goto free_stack;
>
> + err = scs_prepare(tsk, node);
> + if (err)
> + goto free_stack;
> +
> #ifdef CONFIG_SECCOMP
> /*
> * We must handle setting up seccomp filters once we're under
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index dd05a378631a..e7faeb383008 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -6013,6 +6013,8 @@ void init_idle(struct task_struct *idle, int cpu)
> raw_spin_lock_irqsave(&idle->pi_lock, flags);
> raw_spin_lock(&rq->lock);
>
> + scs_task_reset(idle);

Could we please do this next to the kasan_unpoison_task_stack() call,
Either just before, or just after?

They're boot addressing the same issue where previously live stack is
being reused, and in general I'd expect them to occur at the same time
(though I understand idle will be a bit different).

> +
> __sched_fork(0, idle);
> idle->state = TASK_RUNNING;
> idle->se.exec_start = sched_clock();
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 0db2c1b3361e..c153003a011c 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -58,6 +58,7 @@
> #include <linux/profile.h>
> #include <linux/psi.h>
> #include <linux/rcupdate_wait.h>
> +#include <linux/scs.h>
> #include <linux/security.h>
> #include <linux/stop_machine.h>
> #include <linux/suspend.h>

This include looks extraneous.

> diff --git a/kernel/scs.c b/kernel/scs.c
> new file mode 100644
> index 000000000000..7c1a40020754
> --- /dev/null
> +++ b/kernel/scs.c
> @@ -0,0 +1,169 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Shadow Call Stack support.
> + *
> + * Copyright (C) 2019 Google LLC
> + */
> +
> +#include <linux/cpuhotplug.h>
> +#include <linux/mm.h>
> +#include <linux/mmzone.h>
> +#include <linux/scs.h>
> +#include <linux/slab.h>
> +#include <linux/vmalloc.h>
> +#include <asm/scs.h>
> +
> +static inline void *__scs_base(struct task_struct *tsk)
> +{
> + /*
> + * We allow architectures to use the shadow_call_stack field in
> + * struct thread_info to store the current shadow stack pointer
> + * during context switches.
> + *
> + * This allows the implementation to also clear the field when
> + * the task is active to avoid keeping pointers to the current
> + * task's shadow stack in memory. This can make it harder for an
> + * attacker to locate the shadow stack, but also requires us to
> + * compute the base address when needed.
> + *
> + * We assume the stack is aligned to SCS_SIZE.
> + */

How about:

/*
* To minimize risk the of exposure, architectures may clear a
* task's thread_info::shadow_call_stack while that task is
* running, and only save/restore the active shadow call stack
* pointer when the usual register may be clobbered (e.g. across
* context switches).
*
* The shadow call stack is aligned to SCS_SIZE, and grows
* upwards, so we can mask out the low bits to extract the base
* when the task is not running.
*/

... which I think makes the lifetime and constraints a bit clearer.

> + return (void *)((uintptr_t)task_scs(tsk) & ~(SCS_SIZE - 1));

We usually use unsigned long ratehr than uintptr_t. Could we please use
that for consistency?

The kernel relies on sizeof(unsigned long) == sizeof(void *) tree-wide,
so that doesn't cause issues for us here.

Similarly, as suggested above, it would be easier to reason about this
knowing that SCS_SIZE is an unsigned long. While IIUC we'd get sign
extension here when it's promoted, giving the definition a UL suffix
minimizes the scope for error.

> +}
> +
> +#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
> +
> +/* Keep a cache of shadow stacks */
> +#define SCS_CACHE_SIZE 2

How about:

/* Matches NR_CACHED_STACKS for VMAP_STACK */
#define NR_CACHED_SCS 2

... which explains where the number came from, and avoids confusion that
the SIZE is a byte size rather than number of elements.

> +static DEFINE_PER_CPU(void *, scs_cache[SCS_CACHE_SIZE]);
> +
> +static void *scs_alloc(int node)
> +{
> + int i;
> +
> + for (i = 0; i < SCS_CACHE_SIZE; i++) {
> + void *s;
> +
> + s = this_cpu_xchg(scs_cache[i], NULL);
> + if (s) {
> + memset(s, 0, SCS_SIZE);
> + return s;
> + }
> + }
> +
> + /*
> + * We allocate a full page for the shadow stack, which should be
> + * more than we need. Check the assumption nevertheless.
> + */
> + BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);
> +
> + return __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
> + VMALLOC_START, VMALLOC_END,
> + GFP_SCS, PAGE_KERNEL, 0,
> + node, __builtin_return_address(0));
> +}
> +
> +static void scs_free(void *s)
> +{
> + int i;
> +
> + for (i = 0; i < SCS_CACHE_SIZE; i++)
> + if (this_cpu_cmpxchg(scs_cache[i], 0, s) == 0)
> + return;

Here we should compare to NULL rather than 0.

> +
> + vfree_atomic(s);
> +}
> +
> +static int scs_cleanup(unsigned int cpu)
> +{
> + int i;
> + void **cache = per_cpu_ptr(scs_cache, cpu);
> +
> + for (i = 0; i < SCS_CACHE_SIZE; i++) {
> + vfree(cache[i]);
> + cache[i] = NULL;
> + }
> +
> + return 0;
> +}
> +
> +void __init scs_init(void)
> +{
> + cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
> + scs_cleanup);

We probably want to do something if this call fails. It looks like we'd
only leak two pages (and we'd be able to use them if/when that CPU is
brought back online. A WARN_ON() is probably fine.

Thanks,
Mark.

> +}
> +
> +#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */
> +
> +static struct kmem_cache *scs_cache;
> +
> +static inline void *scs_alloc(int node)
> +{
> + return kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
> +}
> +
> +static inline void scs_free(void *s)
> +{
> + kmem_cache_free(scs_cache, s);
> +}
> +
> +void __init scs_init(void)
> +{
> + scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
> + 0, NULL);
> + WARN_ON(!scs_cache);
> +}
> +
> +#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
> +
> +static inline unsigned long *scs_magic(struct task_struct *tsk)
> +{
> + return (unsigned long *)(__scs_base(tsk) + SCS_SIZE) - 1;
> +}
> +
> +static inline void scs_set_magic(struct task_struct *tsk)
> +{
> + *scs_magic(tsk) = SCS_END_MAGIC;
> +}
> +
> +void scs_task_reset(struct task_struct *tsk)
> +{
> + /*
> + * Reset the shadow stack to the base address in case the task
> + * is reused.
> + */
> + task_set_scs(tsk, __scs_base(tsk));
> +}
> +
> +int scs_prepare(struct task_struct *tsk, int node)
> +{
> + void *s;
> +
> + s = scs_alloc(node);
> + if (!s)
> + return -ENOMEM;
> +
> + task_set_scs(tsk, s);
> + scs_set_magic(tsk);
> +
> + return 0;
> +}
> +
> +bool scs_corrupted(struct task_struct *tsk)
> +{
> + return *scs_magic(tsk) != SCS_END_MAGIC;
> +}
> +
> +void scs_release(struct task_struct *tsk)
> +{
> + void *s;
> +
> + s = __scs_base(tsk);
> + if (!s)
> + return;
> +
> + WARN_ON(scs_corrupted(tsk));
> +
> + task_set_scs(tsk, NULL);
> + scs_free(s);
> +}
> --
> 2.24.0.rc1.363.gb1bccd3e3d-goog
>

2019-11-04 12:44:01

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v4 07/17] scs: add support for stack usage debugging

On Fri, Nov 01, 2019 at 03:11:40PM -0700, Sami Tolvanen wrote:
> Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks. When enabled,
> also prints out the highest shadow stack usage per process.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> kernel/scs.c | 39 +++++++++++++++++++++++++++++++++++++++
> 1 file changed, 39 insertions(+)
>
> diff --git a/kernel/scs.c b/kernel/scs.c
> index 7780fc4e29ac..67c43af627d1 100644
> --- a/kernel/scs.c
> +++ b/kernel/scs.c
> @@ -167,6 +167,44 @@ int scs_prepare(struct task_struct *tsk, int node)
> return 0;
> }
>
> +#ifdef CONFIG_DEBUG_STACK_USAGE
> +static inline unsigned long scs_used(struct task_struct *tsk)
> +{
> + unsigned long *p = __scs_base(tsk);
> + unsigned long *end = scs_magic(tsk);
> + uintptr_t s = (uintptr_t)p;

As previously, please use unsigned long for consistency.

> +
> + while (p < end && *p)
> + p++;

I think this is the only place where we legtimately access the shadow
call stack directly. When using SCS and KASAN, are the
compiler-generated accesses to the SCS instrumented?

If not, it might make sense to make this:

while (p < end && READ_ONCE_NOCKECK(*p))

... and poison the allocation from KASAN's PoV, so that we can find
unintentional accesses more easily.

Mark.

> +
> + return (uintptr_t)p - s;
> +}
> +
> +static void scs_check_usage(struct task_struct *tsk)
> +{
> + static DEFINE_SPINLOCK(lock);
> + static unsigned long highest;
> + unsigned long used = scs_used(tsk);
> +
> + if (used <= highest)
> + return;
> +
> + spin_lock(&lock);
> +
> + if (used > highest) {
> + pr_info("%s: highest shadow stack usage %lu bytes\n",
> + __func__, used);
> + highest = used;
> + }
> +
> + spin_unlock(&lock);
> +}
> +#else
> +static inline void scs_check_usage(struct task_struct *tsk)
> +{
> +}
> +#endif
> +
> bool scs_corrupted(struct task_struct *tsk)
> {
> return *scs_magic(tsk) != SCS_END_MAGIC;
> @@ -181,6 +219,7 @@ void scs_release(struct task_struct *tsk)
> return;
>
> WARN_ON(scs_corrupted(tsk));
> + scs_check_usage(tsk);
>
> scs_account(tsk, -1);
> task_set_scs(tsk, NULL);
> --
> 2.24.0.rc1.363.gb1bccd3e3d-goog
>

2019-11-04 13:14:55

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH v4 06/17] scs: add accounting

On 2019-11-01 23:21, Sami Tolvanen wrote:
> This change adds accounting for the memory allocated for shadow
> stacks.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
> drivers/base/node.c | 6 ++++++
> fs/proc/meminfo.c | 4 ++++
> include/linux/mmzone.h | 3 +++
> kernel/scs.c | 19 +++++++++++++++++++
> mm/page_alloc.c | 6 ++++++
> mm/vmstat.c | 3 +++
> 6 files changed, 41 insertions(+)
>
> diff --git a/drivers/base/node.c b/drivers/base/node.c
> index 296546ffed6c..111e58ec231e 100644
> --- a/drivers/base/node.c
> +++ b/drivers/base/node.c
> @@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device
> *dev,
> "Node %d AnonPages: %8lu kB\n"
> "Node %d Shmem: %8lu kB\n"
> "Node %d KernelStack: %8lu kB\n"
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + "Node %d ShadowCallStack:%8lu kB\n"
> +#endif
> "Node %d PageTables: %8lu kB\n"
> "Node %d NFS_Unstable: %8lu kB\n"
> "Node %d Bounce: %8lu kB\n"
> @@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device
> *dev,
> nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
> nid, K(i.sharedram),
> nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) /
> 1024,
> +#endif
> nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
> nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
> nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
> diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
> index 8c1f1bb1a5ce..49768005a79e 100644
> --- a/fs/proc/meminfo.c
> +++ b/fs/proc/meminfo.c
> @@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m,
> void *v)
> show_val_kb(m, "SUnreclaim: ", sunreclaim);
> seq_printf(m, "KernelStack: %8lu kB\n",
> global_zone_page_state(NR_KERNEL_STACK_KB));
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + seq_printf(m, "ShadowCallStack:%8lu kB\n",
> + global_zone_page_state(NR_KERNEL_SCS_BYTES) / 1024);
> +#endif
> show_val_kb(m, "PageTables: ",
> global_zone_page_state(NR_PAGETABLE));
>
> diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
> index bda20282746b..fcb8c1708f9e 100644
> --- a/include/linux/mmzone.h
> +++ b/include/linux/mmzone.h
> @@ -200,6 +200,9 @@ enum zone_stat_item {
> NR_MLOCK, /* mlock()ed pages found and moved off LRU */
> NR_PAGETABLE, /* used for pagetables */
> NR_KERNEL_STACK_KB, /* measured in KiB */
> +#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)

Is there any reason why you're not consistently using only one of
"#if IS_ENABLED(...)" or "#ifdef ...", but instead a mix of both?

Thanks,

M.
--
Jazz is not dead. It just smells funny...

2019-11-04 13:23:08

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH v4 13/17] arm64: preserve x18 when CPU is suspended

On 2019-11-01 23:21, Sami Tolvanen wrote:
> Don't lose the current task's shadow stack when the CPU is suspended.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Nick Desaulniers <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
> arch/arm64/include/asm/suspend.h | 2 +-
> arch/arm64/mm/proc.S | 10 ++++++++++
> 2 files changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/suspend.h
> b/arch/arm64/include/asm/suspend.h
> index 8939c87c4dce..0cde2f473971 100644
> --- a/arch/arm64/include/asm/suspend.h
> +++ b/arch/arm64/include/asm/suspend.h
> @@ -2,7 +2,7 @@
> #ifndef __ASM_SUSPEND_H
> #define __ASM_SUSPEND_H
>
> -#define NR_CTX_REGS 12
> +#define NR_CTX_REGS 13
> #define NR_CALLEE_SAVED_REGS 12
>
> /*
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index fdabf40a83c8..5616dc52a033 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -49,6 +49,8 @@
> * cpu_do_suspend - save CPU registers context
> *
> * x0: virtual address of context pointer
> + *
> + * This must be kept in sync with struct cpu_suspend_ctx in
> <asm/suspend.h>.
> */
> ENTRY(cpu_do_suspend)
> mrs x2, tpidr_el0
> @@ -73,6 +75,9 @@ alternative_endif
> stp x8, x9, [x0, #48]
> stp x10, x11, [x0, #64]
> stp x12, x13, [x0, #80]
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + str x18, [x0, #96]
> +#endif

Do we need the #ifdefery here? We didn't add that to the KVM path,
and I'd feel better having a single behaviour, specially when
NR_CTX_REGS is unconditionally sized to hold 13 regs.

> ret
> ENDPROC(cpu_do_suspend)
>
> @@ -89,6 +94,11 @@ ENTRY(cpu_do_resume)
> ldp x9, x10, [x0, #48]
> ldp x11, x12, [x0, #64]
> ldp x13, x14, [x0, #80]
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + ldr x18, [x0, #96]
> + /* Clear the SCS pointer from the state buffer */
> + str xzr, [x0, #96]
> +#endif
> msr tpidr_el0, x2
> msr tpidrro_el0, x3
> msr contextidr_el1, x4

Thanks,

M.
--
Jazz is not dead. It just smells funny...

2019-11-04 13:31:48

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH v4 03/17] arm64: kvm: stop treating register x18 as caller save

On 2019-11-04 12:13, Marc Zyngier wrote:
> Hi Sami,
>
> On 2019-11-01 23:20, Sami Tolvanen wrote:
>> From: Ard Biesheuvel <[email protected]>
>>
>> In preparation of reserving x18, stop treating it as caller save in
>> the KVM guest entry/exit code. Currently, the code assumes there is
>> no need to preserve it for the host, given that it would have been
>> assumed clobbered anyway by the function call to __guest_enter().
>> Instead, preserve its value and restore it upon return.
>>
>> Link: https://patchwork.kernel.org/patch/9836891/
>> Signed-off-by: Ard Biesheuvel <[email protected]>
>> [Sami: updated commit message, switched from x18 to x29 for the
>> guest
>> context]
>> Signed-off-by: Sami Tolvanen <[email protected]>
>> Reviewed-by: Kees Cook <[email protected]>
>
> If you intend for this to be merged via the arm64 tree, please add my
>
> Reviewed-by: Marc Zyngier <[email protected]>

Erm... Muscle memory strikes again. Please ignore the above and use the
following instead:

Reviewed-by: Marc Zyngier <[email protected]>

Thanks,

M.
--
Jazz is not dead. It just smells funny...

2019-11-04 16:46:05

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v4 04/17] arm64: kernel: avoid x18 __cpu_soft_restart

On Mon, Nov 4, 2019 at 3:39 AM Mark Rutland <[email protected]> wrote:
> Trivial nit, but the commit title is missing "in" between x18 and
> __cpu_soft_restart.

Oops, thanks for pointing that out. I'll fix this in v5.

Sami

2019-11-04 16:46:15

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v4 06/17] scs: add accounting

On Mon, Nov 4, 2019 at 5:13 AM Marc Zyngier <[email protected]> wrote:
> Is there any reason why you're not consistently using only one of
> "#if IS_ENABLED(...)" or "#ifdef ...", but instead a mix of both?

This is to match the style already used in each file. For example,
fs/proc/meminfo.c uses #ifdef for other configs in the same function,
and include/linux/mmzone.h uses #if IS_ENABLED(...).

Sami

2019-11-04 17:01:17

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH v4 06/17] scs: add accounting

On 2019-11-04 17:52, Sami Tolvanen wrote:
> On Mon, Nov 4, 2019 at 5:13 AM Marc Zyngier <[email protected]> wrote:
>> Is there any reason why you're not consistently using only one of
>> "#if IS_ENABLED(...)" or "#ifdef ...", but instead a mix of both?
>
> This is to match the style already used in each file. For example,
> fs/proc/meminfo.c uses #ifdef for other configs in the same function,
> and include/linux/mmzone.h uses #if IS_ENABLED(...).

Ah, fair enough.

M.
--
Jazz is not dead. It just smells funny...

2019-11-04 17:06:11

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v4 10/17] arm64: disable kretprobes with SCS

On Fri, Nov 01, 2019 at 03:11:43PM -0700, Sami Tolvanen wrote:
> With CONFIG_KRETPROBES, function return addresses are modified to
> redirect control flow to kretprobe_trampoline. This is incompatible
> with SCS.

I'm a bit confused as to why that's the case -- could you please
elaborate on how this is incompatible?

IIUC kretrobes works by patching the function entry point with a BRK, so
that it can modify the LR _before_ it is saved to the stack. I don't see
how SCS affects that.

When the instrumented function returns, it'll balance its SCS state,
then "return" to kretprobe_trampoline. Since kretprobe_trampoline is
plain assembly, it doesn't have SCS, and can modify the LR live, as it
does.

So functionally, that appears to work. What am I missing?

Thanks,
Mark.

> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
> arch/arm64/Kconfig | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 3f047afb982c..e7b57a8a5531 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -165,7 +165,7 @@ config ARM64
> select HAVE_STACKPROTECTOR
> select HAVE_SYSCALL_TRACEPOINTS
> select HAVE_KPROBES
> - select HAVE_KRETPROBES
> + select HAVE_KRETPROBES if !SHADOW_CALL_STACK
> select HAVE_GENERIC_VDSO
> select IOMMU_DMA if IOMMU_SUPPORT
> select IRQ_DOMAIN
> --
> 2.24.0.rc1.363.gb1bccd3e3d-goog
>

2019-11-04 17:12:51

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v4 11/17] arm64: disable function graph tracing with SCS

On Fri, Nov 01, 2019 at 03:11:44PM -0700, Sami Tolvanen wrote:
> With CONFIG_FUNCTION_GRAPH_TRACER, function return addresses are
> modified in ftrace_graph_caller and prepare_ftrace_return to redirect
> control flow to ftrace_return_to_handler. This is incompatible with
> SCS.

Can you please elaborate on _how_ this is incompatible in the commit
message?

For example, it's not clear to me if you mean that's functionally
incompatible, or if you're trying to remove return-altering gadgets.

If there's a functional incompatibility, please spell that out a bit
more clearly. Likewise if this is about minimizing the set of places
that can mess with control-flow outside of usual function conventions.

Thanks,
Mark.

>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
> arch/arm64/Kconfig | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index e7b57a8a5531..42867174920f 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -148,7 +148,7 @@ config ARM64
> select HAVE_FTRACE_MCOUNT_RECORD
> select HAVE_FUNCTION_TRACER
> select HAVE_FUNCTION_ERROR_INJECTION
> - select HAVE_FUNCTION_GRAPH_TRACER
> + select HAVE_FUNCTION_GRAPH_TRACER if !SHADOW_CALL_STACK
> select HAVE_GCC_PLUGINS
> select HAVE_HW_BREAKPOINT if PERF_EVENTS
> select HAVE_IRQ_TIME_ACCOUNTING
> --
> 2.24.0.rc1.363.gb1bccd3e3d-goog
>

2019-11-04 18:26:34

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v4 05/17] add support for Clang's Shadow Call Stack (SCS)

On Mon, Nov 4, 2019 at 4:31 AM Mark Rutland <[email protected]> wrote:
> > +/*
> > + * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
> > + * architecture) provided ~40% safety margin on stack usage while keeping
> > + * memory allocation overhead reasonable.
> > + */
> > +#define SCS_SIZE 1024
>
> To make it easier to reason about type promotion rules (and avoid that
> we accidentaly mask out high bits when using this to generate a mask),
> can we please make this 1024UL?

Sure.

> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -6013,6 +6013,8 @@ void init_idle(struct task_struct *idle, int cpu)
> > raw_spin_lock_irqsave(&idle->pi_lock, flags);
> > raw_spin_lock(&rq->lock);
> >
> > + scs_task_reset(idle);
>
> Could we please do this next to the kasan_unpoison_task_stack() call,
> Either just before, or just after?
>
> They're boot addressing the same issue where previously live stack is
> being reused, and in general I'd expect them to occur at the same time
> (though I understand idle will be a bit different).

Good point, I'll move this.

> > --- a/kernel/sched/sched.h
> > +++ b/kernel/sched/sched.h
> > @@ -58,6 +58,7 @@
> > #include <linux/profile.h>
> > #include <linux/psi.h>
> > #include <linux/rcupdate_wait.h>
> > +#include <linux/scs.h>
> > #include <linux/security.h>
> > #include <linux/stop_machine.h>
> > #include <linux/suspend.h>
>
> This include looks extraneous.

I added this to sched.h, because most of the includes used in
kernel/sched appear to be there, but I can move this to
kernel/sched/core.c instead.

> > +static inline void *__scs_base(struct task_struct *tsk)
> > +{
> > + /*
> > + * We allow architectures to use the shadow_call_stack field in
> > + * struct thread_info to store the current shadow stack pointer
> > + * during context switches.
> > + *
> > + * This allows the implementation to also clear the field when
> > + * the task is active to avoid keeping pointers to the current
> > + * task's shadow stack in memory. This can make it harder for an
> > + * attacker to locate the shadow stack, but also requires us to
> > + * compute the base address when needed.
> > + *
> > + * We assume the stack is aligned to SCS_SIZE.
> > + */
>
> How about:
>
> /*
> * To minimize risk the of exposure, architectures may clear a
> * task's thread_info::shadow_call_stack while that task is
> * running, and only save/restore the active shadow call stack
> * pointer when the usual register may be clobbered (e.g. across
> * context switches).
> *
> * The shadow call stack is aligned to SCS_SIZE, and grows
> * upwards, so we can mask out the low bits to extract the base
> * when the task is not running.
> */
>
> ... which I think makes the lifetime and constraints a bit clearer.

Sounds good to me, thanks.

> > + return (void *)((uintptr_t)task_scs(tsk) & ~(SCS_SIZE - 1));
>
> We usually use unsigned long ratehr than uintptr_t. Could we please use
> that for consistency?
>
> The kernel relies on sizeof(unsigned long) == sizeof(void *) tree-wide,
> so that doesn't cause issues for us here.
>
> Similarly, as suggested above, it would be easier to reason about this
> knowing that SCS_SIZE is an unsigned long. While IIUC we'd get sign
> extension here when it's promoted, giving the definition a UL suffix
> minimizes the scope for error.

OK, I'll switch to unsigned long.

> > +/* Keep a cache of shadow stacks */
> > +#define SCS_CACHE_SIZE 2
>
> How about:
>
> /* Matches NR_CACHED_STACKS for VMAP_STACK */
> #define NR_CACHED_SCS 2
>
> ... which explains where the number came from, and avoids confusion that
> the SIZE is a byte size rather than number of elements.

Agreed, that sounds better.

> > +static void scs_free(void *s)
> > +{
> > + int i;
> > +
> > + for (i = 0; i < SCS_CACHE_SIZE; i++)
> > + if (this_cpu_cmpxchg(scs_cache[i], 0, s) == 0)
> > + return;
>
> Here we should compare to NULL rather than 0.

Ack.

> > +void __init scs_init(void)
> > +{
> > + cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
> > + scs_cleanup);
>
> We probably want to do something if this call fails. It looks like we'd
> only leak two pages (and we'd be able to use them if/when that CPU is
> brought back online. A WARN_ON() is probably fine.

fork_init() in kernel/fork.c lets this fail quietly, but adding a
WARN_ON seems fine.

I will include these changes in v5.

Sami

2019-11-04 21:38:46

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v4 07/17] scs: add support for stack usage debugging

On Mon, Nov 4, 2019 at 4:40 AM Mark Rutland <[email protected]> wrote:
> > +#ifdef CONFIG_DEBUG_STACK_USAGE
> > +static inline unsigned long scs_used(struct task_struct *tsk)
> > +{
> > + unsigned long *p = __scs_base(tsk);
> > + unsigned long *end = scs_magic(tsk);
> > + uintptr_t s = (uintptr_t)p;
>
> As previously, please use unsigned long for consistency.

Ack.

> > + while (p < end && *p)
> > + p++;
>
> I think this is the only place where we legtimately access the shadow
> call stack directly.

There's also scs_corrupted, which checks that the end magic is intact.

> When using SCS and KASAN, are the
> compiler-generated accesses to the SCS instrumented?
>
> If not, it might make sense to make this:
>
> while (p < end && READ_ONCE_NOCKECK(*p))
>
> ... and poison the allocation from KASAN's PoV, so that we can find
> unintentional accesses more easily.

Sure, that makes sense. I can poison the allocation for the
non-vmalloc case, I'll just need to refactor scs_set_magic to happen
before the poisoning.

Sami

2019-11-04 21:39:50

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v4 13/17] arm64: preserve x18 when CPU is suspended

On Mon, Nov 4, 2019 at 5:20 AM Marc Zyngier <[email protected]> wrote:
> > ENTRY(cpu_do_suspend)
> > mrs x2, tpidr_el0
> > @@ -73,6 +75,9 @@ alternative_endif
> > stp x8, x9, [x0, #48]
> > stp x10, x11, [x0, #64]
> > stp x12, x13, [x0, #80]
> > +#ifdef CONFIG_SHADOW_CALL_STACK
> > + str x18, [x0, #96]
> > +#endif
>
> Do we need the #ifdefery here? We didn't add that to the KVM path,
> and I'd feel better having a single behaviour, specially when
> NR_CTX_REGS is unconditionally sized to hold 13 regs.

I'm fine with dropping the ifdefs here in v5 unless someone objects to this.

Sami

2019-11-04 21:46:27

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v4 03/17] arm64: kvm: stop treating register x18 as caller save

On Mon, Nov 4, 2019 at 3:51 AM Mark Rutland <[email protected]> wrote:
> > --- a/arch/arm64/kvm/hyp/entry.S
> > +++ b/arch/arm64/kvm/hyp/entry.S
> > @@ -23,6 +23,7 @@
> > .pushsection .hyp.text, "ax"
> >
>
> Could we please add a note here, e.g.
>
> /*
> * We treat x18 as callee-saved as the host may use it as a platform
> * register (e.g. for shadow call stack).
> */
>
> ... as that will avoid anyone trying to optimize this away in future
> after reading the AAPCS.

Sure, that's a good idea.

> > .macro restore_callee_saved_regs ctxt
> > + // We assume \ctxt is not x18-x28
>
> Probably worth s/assume/require/ here.

Agreed, I'll change this in v5.

Sami

2019-11-04 22:01:54

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH v4 13/17] arm64: preserve x18 when CPU is suspended

On Mon, Nov 4, 2019 at 1:38 PM Sami Tolvanen <[email protected]> wrote:
>
> On Mon, Nov 4, 2019 at 5:20 AM Marc Zyngier <[email protected]> wrote:
> > > ENTRY(cpu_do_suspend)
> > > mrs x2, tpidr_el0
> > > @@ -73,6 +75,9 @@ alternative_endif
> > > stp x8, x9, [x0, #48]
> > > stp x10, x11, [x0, #64]
> > > stp x12, x13, [x0, #80]
> > > +#ifdef CONFIG_SHADOW_CALL_STACK
> > > + str x18, [x0, #96]
> > > +#endif
> >
> > Do we need the #ifdefery here? We didn't add that to the KVM path,
> > and I'd feel better having a single behaviour, specially when
> > NR_CTX_REGS is unconditionally sized to hold 13 regs.
>
> I'm fine with dropping the ifdefs here in v5 unless someone objects to this.

Oh, yeah I guess it would be good to be consistent. Rather than drop
the ifdefs, would you (Marc) be ok with conditionally setting
NR_CTX_REGS based on CONFIG_SHADOW_CALL_STACK, and doing so in KVM?
(So 3 ifdefs, rather than 0)?

Without any conditionals or comments, it's not clear why x18 is being
saved and restored (unless git blame survives, or a comment is added
in place of the ifdefs in v6).
--
Thanks,
~Nick Desaulniers

2019-11-04 23:46:25

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v4 10/17] arm64: disable kretprobes with SCS

On Mon, Nov 4, 2019 at 9:05 AM Mark Rutland <[email protected]> wrote:
> I'm a bit confused as to why that's the case -- could you please
> elaborate on how this is incompatible?
>
> IIUC kretrobes works by patching the function entry point with a BRK, so
> that it can modify the LR _before_ it is saved to the stack. I don't see
> how SCS affects that.

You're correct. While this may not be optimal for reducing attack
surface, I just tested this to confirm that there's no functional
conflict. I'll drop this and related patches from v5.

Sami

2019-11-04 23:48:08

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v4 11/17] arm64: disable function graph tracing with SCS

On Mon, Nov 4, 2019 at 9:11 AM Mark Rutland <[email protected]> wrote:
> Can you please elaborate on _how_ this is incompatible in the commit
> message?
>
> For example, it's not clear to me if you mean that's functionally
> incompatible, or if you're trying to remove return-altering gadgets.
>
> If there's a functional incompatibility, please spell that out a bit
> more clearly. Likewise if this is about minimizing the set of places
> that can mess with control-flow outside of usual function conventions.

Sure, I'll add a better description in v5. In this case, the return
address is modified in the kernel stack, which means the changes are
ignored with SCS.

Sami

2019-11-05 00:03:40

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v4 13/17] arm64: preserve x18 when CPU is suspended

On Mon, Nov 4, 2019 at 1:59 PM Nick Desaulniers <[email protected]> wrote:
>
> On Mon, Nov 4, 2019 at 1:38 PM Sami Tolvanen <[email protected]> wrote:
> >
> > On Mon, Nov 4, 2019 at 5:20 AM Marc Zyngier <[email protected]> wrote:
> > > > ENTRY(cpu_do_suspend)
> > > > mrs x2, tpidr_el0
> > > > @@ -73,6 +75,9 @@ alternative_endif
> > > > stp x8, x9, [x0, #48]
> > > > stp x10, x11, [x0, #64]
> > > > stp x12, x13, [x0, #80]
> > > > +#ifdef CONFIG_SHADOW_CALL_STACK
> > > > + str x18, [x0, #96]
> > > > +#endif
> > >
> > > Do we need the #ifdefery here? We didn't add that to the KVM path,
> > > and I'd feel better having a single behaviour, specially when
> > > NR_CTX_REGS is unconditionally sized to hold 13 regs.
> >
> > I'm fine with dropping the ifdefs here in v5 unless someone objects to this.
>
> Oh, yeah I guess it would be good to be consistent. Rather than drop
> the ifdefs, would you (Marc) be ok with conditionally setting
> NR_CTX_REGS based on CONFIG_SHADOW_CALL_STACK, and doing so in KVM?
> (So 3 ifdefs, rather than 0)?
>
> Without any conditionals or comments, it's not clear why x18 is being
> saved and restored (unless git blame survives, or a comment is added
> in place of the ifdefs in v6).

True. Clearing the sleep state buffer in cpu_do_resume is also
pointless without CONFIG_SHADOW_CALL_STACK, so if the ifdefs are
removed, some kind of an explanation is needed there.

Sami

2019-11-05 14:57:40

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH v4 13/17] arm64: preserve x18 when CPU is suspended

On 2019-11-05 01:11, Sami Tolvanen wrote:
> On Mon, Nov 4, 2019 at 1:59 PM Nick Desaulniers
> <[email protected]> wrote:
>>
>> On Mon, Nov 4, 2019 at 1:38 PM Sami Tolvanen
>> <[email protected]> wrote:
>> >
>> > On Mon, Nov 4, 2019 at 5:20 AM Marc Zyngier <[email protected]>
>> wrote:
>> > > > ENTRY(cpu_do_suspend)
>> > > > mrs x2, tpidr_el0
>> > > > @@ -73,6 +75,9 @@ alternative_endif
>> > > > stp x8, x9, [x0, #48]
>> > > > stp x10, x11, [x0, #64]
>> > > > stp x12, x13, [x0, #80]
>> > > > +#ifdef CONFIG_SHADOW_CALL_STACK
>> > > > + str x18, [x0, #96]
>> > > > +#endif
>> > >
>> > > Do we need the #ifdefery here? We didn't add that to the KVM
>> path,
>> > > and I'd feel better having a single behaviour, specially when
>> > > NR_CTX_REGS is unconditionally sized to hold 13 regs.
>> >
>> > I'm fine with dropping the ifdefs here in v5 unless someone
>> objects to this.
>>
>> Oh, yeah I guess it would be good to be consistent. Rather than
>> drop
>> the ifdefs, would you (Marc) be ok with conditionally setting
>> NR_CTX_REGS based on CONFIG_SHADOW_CALL_STACK, and doing so in KVM?
>> (So 3 ifdefs, rather than 0)?
>>
>> Without any conditionals or comments, it's not clear why x18 is
>> being
>> saved and restored (unless git blame survives, or a comment is added
>> in place of the ifdefs in v6).
>
> True. Clearing the sleep state buffer in cpu_do_resume is also
> pointless without CONFIG_SHADOW_CALL_STACK, so if the ifdefs are
> removed, some kind of an explanation is needed there.

I can't imagine the overhead being noticeable, and I certainly value
minimizing the testing space. Sticking a comment there should be
enough for people hacking on this to understand that this isn't
entirely dead code.

Thanks,

M.
--
Jazz is not dead. It just smells funny...

2019-11-05 19:56:19

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v4 11/17] arm64: disable function graph tracing with SCS

On Mon, Nov 04, 2019 at 03:44:39PM -0800, Sami Tolvanen wrote:
> On Mon, Nov 4, 2019 at 9:11 AM Mark Rutland <[email protected]> wrote:
> > Can you please elaborate on _how_ this is incompatible in the commit
> > message?
> >
> > For example, it's not clear to me if you mean that's functionally
> > incompatible, or if you're trying to remove return-altering gadgets.
> >
> > If there's a functional incompatibility, please spell that out a bit
> > more clearly. Likewise if this is about minimizing the set of places
> > that can mess with control-flow outside of usual function conventions.
>
> Sure, I'll add a better description in v5. In this case, the return
> address is modified in the kernel stack, which means the changes are
> ignored with SCS.

Ok, that makes sense to me. I'd suggest something like:

| The graph tracer hooks returns by modifying frame records on the
| (regular) stack, but with SCS the return address is taken from the
| shadow stack, and the value in the frame record has no effect. As we
| don't currently have a mechanism to determine the corresponding slot
| on the shadow stack (and to pass this through the ftrace
| infrastructure), for now let's disable the graph tracer when SCS is
| enabled.

... as I suspect with some rework of the trampoline and common ftrace
code we'd be able to correctly manipulate the shadow stack for this.
Similarly, if clang gained -fpatchable-funciton-etnry, we'd get that for
free.

Thanks,
Mark.

2019-11-05 19:56:20

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v4 07/17] scs: add support for stack usage debugging

On Mon, Nov 04, 2019 at 01:35:28PM -0800, Sami Tolvanen wrote:
> On Mon, Nov 4, 2019 at 4:40 AM Mark Rutland <[email protected]> wrote:
> > > +#ifdef CONFIG_DEBUG_STACK_USAGE
> > > +static inline unsigned long scs_used(struct task_struct *tsk)
> > > +{
> > > + unsigned long *p = __scs_base(tsk);
> > > + unsigned long *end = scs_magic(tsk);
> > > + uintptr_t s = (uintptr_t)p;
> >
> > As previously, please use unsigned long for consistency.
>
> Ack.
>
> > > + while (p < end && *p)
> > > + p++;
> >
> > I think this is the only place where we legtimately access the shadow
> > call stack directly.
>
> There's also scs_corrupted, which checks that the end magic is intact.

Ah, true. I missed that.

> > When using SCS and KASAN, are the
> > compiler-generated accesses to the SCS instrumented?
> >
> > If not, it might make sense to make this:
> >
> > while (p < end && READ_ONCE_NOCKECK(*p))
> >
> > ... and poison the allocation from KASAN's PoV, so that we can find
> > unintentional accesses more easily.
>
> Sure, that makes sense. I can poison the allocation for the
> non-vmalloc case, I'll just need to refactor scs_set_magic to happen
> before the poisoning.

Sounds good!

Mark.

2019-11-05 20:03:25

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH v4 11/17] arm64: disable function graph tracing with SCS

On Tue, Nov 5, 2019 at 11:55 AM Mark Rutland <[email protected]> wrote:
> Similarly, if clang gained -fpatchable-funciton-etnry, we'd get that for
> free.

Filed: https://bugs.llvm.org/show_bug.cgi?id=43912
--
Thanks,
~Nick Desaulniers

2019-11-05 20:51:57

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v4 10/17] arm64: disable kretprobes with SCS

On Mon, Nov 04, 2019 at 03:42:09PM -0800, Sami Tolvanen wrote:
> On Mon, Nov 4, 2019 at 9:05 AM Mark Rutland <[email protected]> wrote:
> > I'm a bit confused as to why that's the case -- could you please
> > elaborate on how this is incompatible?
> >
> > IIUC kretrobes works by patching the function entry point with a BRK, so
> > that it can modify the LR _before_ it is saved to the stack. I don't see
> > how SCS affects that.
>
> You're correct. While this may not be optimal for reducing attack
> surface, I just tested this to confirm that there's no functional
> conflict. I'll drop this and related patches from v5.

Great; thanks for confirming!

Mark.

2019-11-05 22:09:06

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v4 11/17] arm64: disable function graph tracing with SCS

On Tue, Nov 5, 2019 at 11:55 AM Mark Rutland <[email protected]> wrote:
> On Mon, Nov 04, 2019 at 03:44:39PM -0800, Sami Tolvanen wrote:
> > Sure, I'll add a better description in v5. In this case, the return
> > address is modified in the kernel stack, which means the changes are
> > ignored with SCS.
>
> Ok, that makes sense to me. I'd suggest something like:
>
> | The graph tracer hooks returns by modifying frame records on the
> | (regular) stack, but with SCS the return address is taken from the
> | shadow stack, and the value in the frame record has no effect. As we
> | don't currently have a mechanism to determine the corresponding slot
> | on the shadow stack (and to pass this through the ftrace
> | infrastructure), for now let's disable the graph tracer when SCS is
> | enabled.
>
> ... as I suspect with some rework of the trampoline and common ftrace
> code we'd be able to correctly manipulate the shadow stack for this.
> Similarly, if clang gained -fpatchable-funciton-etnry, we'd get that for
> free.

That sounds good to me. Thanks, Mark.

Sami

2019-11-05 23:57:19

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v5 00/14] add support for Clang's Shadow Call Stack

This patch series adds support for Clang's Shadow Call Stack
(SCS) mitigation, which uses a separately allocated shadow stack
to protect against return address overwrites. More information
can be found here:

https://clang.llvm.org/docs/ShadowCallStack.html

SCS provides better protection against traditional buffer
overflows than CONFIG_STACKPROTECTOR_*, but it should be noted
that SCS security guarantees in the kernel differ from the ones
documented for user space. The kernel must store addresses of
shadow stacks used by inactive tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

SCS is currently supported only on arm64, where the compiler
requires the x18 register to be reserved for holding the current
task's shadow stack pointer. Because of this, the series includes
patches from Ard to remove x18 usage from assembly code.

With -fsanitize=shadow-call-stack, the compiler injects
instructions to all non-leaf C functions to store the return
address to the shadow stack, and unconditionally load it again
before returning. As a result, SCS is currently incompatible
with features that rely on modifying function return addresses
in the kernel stack to alter control flow, such as function
graph tracing, although it may be possible to later change these
features to modify the shadow stack instead. A copy of the return
address is still kept in the kernel stack for compatibility with
stack unwinding, for example.

SCS has a minimal performance overhead, but allocating
shadow stacks increases kernel memory usage. The feature is
therefore mostly useful on hardware that lacks support for PAC
instructions.

Changes in v5:
- Updated the comment in __scs_base() to Mark's suggestion
- Changed all instances of uintptr_t to unsigned long
- Added allocation poisoning for KASAN to catch unintentional
shadow stack accesses; moved set_set_magic before poisoning
and switched scs_used() and scs_corrupted() to access the
buffer using READ_ONCE_NOCHECK() instead
- Changed scs_free() to check for NULL instead of zero
- Renamed SCS_CACHE_SIZE to NR_CACHED_SCS
- Added a warning if cpuhp_setup_state fails in scs_init()
- Dropped patches disabling kretprobes after confirming there's
no functional conflict with SCS instrumentation
- Added an explanation to the commit message why function graph
tracing and SCS are incompatible
- Removed the ifdefs from arch/arm64/mm/proc.S and added
comments explaining why we are saving and restoring x18
- Updated scs_check_usage format to include process information

Changes in v4:
- Fixed authorship for Ard's patches
- Added missing commit messages
- Commented code that clears SCS from thread_info
- Added a comment about SCS_END_MAGIC being non-canonical

Changes in v3:
- Switched to filter-out for removing SCS flags in Makefiles
- Changed the __noscs attribute to use __no_sanitize__("...")
instead of no_sanitize("...")
- Cleaned up inline function definitions and moved task_scs()
into a macro
- Cleaned up scs_free() and scs_magic()
- Moved SCS initialization into dup_task_struct() and removed
the now unused scs_task_init()
- Added comments to __scs_base() and scs_task_reset() to better
document design choices
- Changed copy_page to make the offset and bias explicit

Changes in v2:
- Changed Ard's KVM patch to use x29 instead of x18 for the
guest context, which makes restore_callee_saved_regs cleaner
- Updated help text (and commit messages) to point out
differences in security properties compared to user space SCS
- Cleaned up config options: removed the ROP protection choice,
replaced the CC_IS_CLANG dependency with an arch-specific
cc-option test, and moved disabling of incompatible config
options to an arch-specific Kconfig
- Added CC_FLAGS_SCS, which are filtered out where needed
instead of using DISABLE_SCS
- Added a __has_feature guard around __noscs for older clang
versions


Ard Biesheuvel (3):
arm64/lib: copy_page: avoid x18 register in assembler code
arm64: kvm: stop treating register x18 as caller save
arm64: kernel: avoid x18 in __cpu_soft_restart

Sami Tolvanen (11):
arm64: mm: avoid x18 in idmap_kpti_install_ng_mappings
add support for Clang's Shadow Call Stack (SCS)
scs: add accounting
scs: add support for stack usage debugging
arm64: disable function graph tracing with SCS
arm64: reserve x18 from general allocation with SCS
arm64: preserve x18 when CPU is suspended
arm64: efi: restore x18 if it was corrupted
arm64: vdso: disable Shadow Call Stack
arm64: disable SCS for hypervisor code
arm64: implement Shadow Call Stack

Makefile | 6 +
arch/Kconfig | 33 ++++
arch/arm64/Kconfig | 7 +-
arch/arm64/Makefile | 4 +
arch/arm64/include/asm/scs.h | 37 ++++
arch/arm64/include/asm/stacktrace.h | 4 +
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/include/asm/thread_info.h | 3 +
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +
arch/arm64/kernel/cpu-reset.S | 4 +-
arch/arm64/kernel/efi-rt-wrapper.S | 7 +-
arch/arm64/kernel/entry.S | 28 +++
arch/arm64/kernel/head.S | 9 +
arch/arm64/kernel/irq.c | 2 +
arch/arm64/kernel/process.c | 2 +
arch/arm64/kernel/scs.c | 39 +++++
arch/arm64/kernel/smp.c | 4 +
arch/arm64/kernel/vdso/Makefile | 2 +-
arch/arm64/kvm/hyp/Makefile | 3 +
arch/arm64/kvm/hyp/entry.S | 45 ++---
arch/arm64/lib/copy_page.S | 38 ++---
arch/arm64/mm/proc.S | 77 +++++----
drivers/base/node.c | 6 +
fs/proc/meminfo.c | 4 +
include/linux/compiler-clang.h | 6 +
include/linux/compiler_types.h | 4 +
include/linux/mmzone.h | 3 +
include/linux/scs.h | 57 +++++++
init/init_task.c | 8 +
kernel/Makefile | 1 +
kernel/fork.c | 9 +
kernel/sched/core.c | 2 +
kernel/scs.c | 246 +++++++++++++++++++++++++++
mm/page_alloc.c | 6 +
mm/vmstat.c | 3 +
36 files changed, 638 insertions(+), 77 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c


base-commit: 26bc672134241a080a83b2ab9aa8abede8d30e1c
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-05 23:57:25

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v5 01/14] arm64: mm: avoid x18 in idmap_kpti_install_ng_mappings

idmap_kpti_install_ng_mappings uses x18 as a temporary register, which
will result in a conflict when x18 is reserved. Use x16 and x17 instead
where needed.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/arm64/mm/proc.S | 63 ++++++++++++++++++++++----------------------
1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index a1e0592d1fbc..fdabf40a83c8 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -250,15 +250,15 @@ ENTRY(idmap_kpti_install_ng_mappings)
/* We're the boot CPU. Wait for the others to catch up */
sevl
1: wfe
- ldaxr w18, [flag_ptr]
- eor w18, w18, num_cpus
- cbnz w18, 1b
+ ldaxr w17, [flag_ptr]
+ eor w17, w17, num_cpus
+ cbnz w17, 1b

/* We need to walk swapper, so turn off the MMU. */
pre_disable_mmu_workaround
- mrs x18, sctlr_el1
- bic x18, x18, #SCTLR_ELx_M
- msr sctlr_el1, x18
+ mrs x17, sctlr_el1
+ bic x17, x17, #SCTLR_ELx_M
+ msr sctlr_el1, x17
isb

/* Everybody is enjoying the idmap, so we can rewrite swapper. */
@@ -281,9 +281,9 @@ skip_pgd:
isb

/* We're done: fire up the MMU again */
- mrs x18, sctlr_el1
- orr x18, x18, #SCTLR_ELx_M
- msr sctlr_el1, x18
+ mrs x17, sctlr_el1
+ orr x17, x17, #SCTLR_ELx_M
+ msr sctlr_el1, x17
isb

/*
@@ -353,46 +353,47 @@ skip_pte:
b.ne do_pte
b next_pmd

+ .unreq cpu
+ .unreq num_cpus
+ .unreq swapper_pa
+ .unreq cur_pgdp
+ .unreq end_pgdp
+ .unreq pgd
+ .unreq cur_pudp
+ .unreq end_pudp
+ .unreq pud
+ .unreq cur_pmdp
+ .unreq end_pmdp
+ .unreq pmd
+ .unreq cur_ptep
+ .unreq end_ptep
+ .unreq pte
+
/* Secondary CPUs end up here */
__idmap_kpti_secondary:
/* Uninstall swapper before surgery begins */
- __idmap_cpu_set_reserved_ttbr1 x18, x17
+ __idmap_cpu_set_reserved_ttbr1 x16, x17

/* Increment the flag to let the boot CPU we're ready */
-1: ldxr w18, [flag_ptr]
- add w18, w18, #1
- stxr w17, w18, [flag_ptr]
+1: ldxr w16, [flag_ptr]
+ add w16, w16, #1
+ stxr w17, w16, [flag_ptr]
cbnz w17, 1b

/* Wait for the boot CPU to finish messing around with swapper */
sevl
1: wfe
- ldxr w18, [flag_ptr]
- cbnz w18, 1b
+ ldxr w16, [flag_ptr]
+ cbnz w16, 1b

/* All done, act like nothing happened */
- offset_ttbr1 swapper_ttb, x18
+ offset_ttbr1 swapper_ttb, x16
msr ttbr1_el1, swapper_ttb
isb
ret

- .unreq cpu
- .unreq num_cpus
- .unreq swapper_pa
.unreq swapper_ttb
.unreq flag_ptr
- .unreq cur_pgdp
- .unreq end_pgdp
- .unreq pgd
- .unreq cur_pudp
- .unreq end_pudp
- .unreq pud
- .unreq cur_pmdp
- .unreq end_pmdp
- .unreq pmd
- .unreq cur_ptep
- .unreq end_ptep
- .unreq pte
ENDPROC(idmap_kpti_install_ng_mappings)
.popsection
#endif
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-05 23:57:32

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v5 02/14] arm64/lib: copy_page: avoid x18 register in assembler code

From: Ard Biesheuvel <[email protected]>

Register x18 will no longer be used as a caller save register in the
future, so stop using it in the copy_page() code.

Link: https://patchwork.kernel.org/patch/9836869/
Signed-off-by: Ard Biesheuvel <[email protected]>
[Sami: changed the offset and bias to be explicit]
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/arm64/lib/copy_page.S | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index bbb8562396af..290dd3c5266c 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -34,45 +34,45 @@ alternative_else_nop_endif
ldp x14, x15, [x1, #96]
ldp x16, x17, [x1, #112]

- mov x18, #(PAGE_SIZE - 128)
+ add x0, x0, #256
add x1, x1, #128
1:
- subs x18, x18, #128
+ tst x0, #(PAGE_SIZE - 1)

alternative_if ARM64_HAS_NO_HW_PREFETCH
prfm pldl1strm, [x1, #384]
alternative_else_nop_endif

- stnp x2, x3, [x0]
+ stnp x2, x3, [x0, #-256]
ldp x2, x3, [x1]
- stnp x4, x5, [x0, #16]
+ stnp x4, x5, [x0, #16 - 256]
ldp x4, x5, [x1, #16]
- stnp x6, x7, [x0, #32]
+ stnp x6, x7, [x0, #32 - 256]
ldp x6, x7, [x1, #32]
- stnp x8, x9, [x0, #48]
+ stnp x8, x9, [x0, #48 - 256]
ldp x8, x9, [x1, #48]
- stnp x10, x11, [x0, #64]
+ stnp x10, x11, [x0, #64 - 256]
ldp x10, x11, [x1, #64]
- stnp x12, x13, [x0, #80]
+ stnp x12, x13, [x0, #80 - 256]
ldp x12, x13, [x1, #80]
- stnp x14, x15, [x0, #96]
+ stnp x14, x15, [x0, #96 - 256]
ldp x14, x15, [x1, #96]
- stnp x16, x17, [x0, #112]
+ stnp x16, x17, [x0, #112 - 256]
ldp x16, x17, [x1, #112]

add x0, x0, #128
add x1, x1, #128

- b.gt 1b
+ b.ne 1b

- stnp x2, x3, [x0]
- stnp x4, x5, [x0, #16]
- stnp x6, x7, [x0, #32]
- stnp x8, x9, [x0, #48]
- stnp x10, x11, [x0, #64]
- stnp x12, x13, [x0, #80]
- stnp x14, x15, [x0, #96]
- stnp x16, x17, [x0, #112]
+ stnp x2, x3, [x0, #-256]
+ stnp x4, x5, [x0, #16 - 256]
+ stnp x6, x7, [x0, #32 - 256]
+ stnp x8, x9, [x0, #48 - 256]
+ stnp x10, x11, [x0, #64 - 256]
+ stnp x12, x13, [x0, #80 - 256]
+ stnp x14, x15, [x0, #96 - 256]
+ stnp x16, x17, [x0, #112 - 256]

ret
ENDPROC(copy_page)
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-05 23:57:38

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v5 04/14] arm64: kernel: avoid x18 in __cpu_soft_restart

From: Ard Biesheuvel <[email protected]>

The code in __cpu_soft_restart() uses x18 as an arbitrary temp register,
which will shortly be disallowed. So use x8 instead.

Link: https://patchwork.kernel.org/patch/9836877/
Signed-off-by: Ard Biesheuvel <[email protected]>
[Sami: updated commit message]
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/kernel/cpu-reset.S | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 6ea337d464c4..32c7bf858dd9 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -42,11 +42,11 @@ ENTRY(__cpu_soft_restart)
mov x0, #HVC_SOFT_RESTART
hvc #0 // no return

-1: mov x18, x1 // entry
+1: mov x8, x1 // entry
mov x0, x2 // arg0
mov x1, x3 // arg1
mov x2, x4 // arg2
- br x18
+ br x8
ENDPROC(__cpu_soft_restart)

.popsection
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-05 23:57:46

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v5 05/14] add support for Clang's Shadow Call Stack (SCS)

This change adds generic support for Clang's Shadow Call Stack,
which uses a shadow stack to protect return addresses from being
overwritten by an attacker. Details are available here:

https://clang.llvm.org/docs/ShadowCallStack.html

Note that security guarantees in the kernel differ from the
ones documented for user space. The kernel must store addresses
of shadow stacks used by other tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Miguel Ojeda <[email protected]>
---
Makefile | 6 ++
arch/Kconfig | 33 ++++++
include/linux/compiler-clang.h | 6 ++
include/linux/compiler_types.h | 4 +
include/linux/scs.h | 57 ++++++++++
init/init_task.c | 8 ++
kernel/Makefile | 1 +
kernel/fork.c | 9 ++
kernel/sched/core.c | 2 +
kernel/scs.c | 187 +++++++++++++++++++++++++++++++++
10 files changed, 313 insertions(+)
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c

diff --git a/Makefile b/Makefile
index b37d0e8fc61d..7f3a4c5c7dcc 100644
--- a/Makefile
+++ b/Makefile
@@ -846,6 +846,12 @@ ifdef CONFIG_LIVEPATCH
KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
endif

+ifdef CONFIG_SHADOW_CALL_STACK
+CC_FLAGS_SCS := -fsanitize=shadow-call-stack
+KBUILD_CFLAGS += $(CC_FLAGS_SCS)
+export CC_FLAGS_SCS
+endif
+
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)

diff --git a/arch/Kconfig b/arch/Kconfig
index 5f8a5d84dbbe..5e34cbcd8d6a 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -521,6 +521,39 @@ config STACKPROTECTOR_STRONG
about 20% of all kernel functions, which increases the kernel code
size by about 2%.

+config ARCH_SUPPORTS_SHADOW_CALL_STACK
+ bool
+ help
+ An architecture should select this if it supports Clang's Shadow
+ Call Stack, has asm/scs.h, and implements runtime support for shadow
+ stack switching.
+
+config SHADOW_CALL_STACK_VMAP
+ bool
+ depends on SHADOW_CALL_STACK
+ help
+ Use virtually mapped shadow call stacks. Selecting this option
+ provides better stack exhaustion protection, but increases per-thread
+ memory consumption as a full page is allocated for each shadow stack.
+
+config SHADOW_CALL_STACK
+ bool "Clang Shadow Call Stack"
+ depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
+ help
+ This option enables Clang's Shadow Call Stack, which uses a
+ shadow stack to protect function return addresses from being
+ overwritten by an attacker. More information can be found from
+ Clang's documentation:
+
+ https://clang.llvm.org/docs/ShadowCallStack.html
+
+ Note that security guarantees in the kernel differ from the ones
+ documented for user space. The kernel must store addresses of shadow
+ stacks used by other tasks and interrupt handlers in memory, which
+ means an attacker capable reading and writing arbitrary memory may
+ be able to locate them and hijack control flow by modifying shadow
+ stacks that are not currently in use.
+
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 333a6695a918..18fc4d29ef27 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -42,3 +42,9 @@
* compilers, like ICC.
*/
#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#if __has_feature(shadow_call_stack)
+# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
+#else
+# define __noscs
+#endif
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 72393a8c1a6c..be5d5be4b1ae 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -202,6 +202,10 @@ struct ftrace_likely_data {
# define randomized_struct_fields_end
#endif

+#ifndef __noscs
+# define __noscs
+#endif
+
#ifndef asm_volatile_goto
#define asm_volatile_goto(x...) asm goto(x)
#endif
diff --git a/include/linux/scs.h b/include/linux/scs.h
new file mode 100644
index 000000000000..c5572fd770b0
--- /dev/null
+++ b/include/linux/scs.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#ifndef _LINUX_SCS_H
+#define _LINUX_SCS_H
+
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+/*
+ * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
+ * architecture) provided ~40% safety margin on stack usage while keeping
+ * memory allocation overhead reasonable.
+ */
+#define SCS_SIZE 1024UL
+#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
+
+/*
+ * A random number outside the kernel's virtual address space to mark the
+ * end of the shadow stack.
+ */
+#define SCS_END_MAGIC 0xaf0194819b1635f6UL
+
+#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack)
+
+static inline void task_set_scs(struct task_struct *tsk, void *s)
+{
+ task_scs(tsk) = s;
+}
+
+extern void scs_init(void);
+extern void scs_task_reset(struct task_struct *tsk);
+extern int scs_prepare(struct task_struct *tsk, int node);
+extern bool scs_corrupted(struct task_struct *tsk);
+extern void scs_release(struct task_struct *tsk);
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+#define task_scs(tsk) NULL
+
+static inline void task_set_scs(struct task_struct *tsk, void *s) {}
+static inline void scs_init(void) {}
+static inline void scs_task_reset(struct task_struct *tsk) {}
+static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
+static inline bool scs_corrupted(struct task_struct *tsk) { return false; }
+static inline void scs_release(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* _LINUX_SCS_H */
diff --git a/init/init_task.c b/init/init_task.c
index 9e5cbe5eab7b..cbd40460e903 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/audit.h>
#include <linux/numa.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <linux/uaccess.h>
@@ -184,6 +185,13 @@ struct task_struct init_task
};
EXPORT_SYMBOL(init_task);

+#ifdef CONFIG_SHADOW_CALL_STACK
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
+ __aligned(SCS_SIZE) = {
+ [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
+};
+#endif
+
/*
* Initial thread structure. Alignment of this is handled by a special
* linker map entry.
diff --git a/kernel/Makefile b/kernel/Makefile
index daad787fb795..313dbd44d576 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-$(CONFIG_PERF_EVENTS) += events/

diff --git a/kernel/fork.c b/kernel/fork.c
index 55af6931c6ec..6c4266019935 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,6 +94,7 @@
#include <linux/livepatch.h>
#include <linux/thread_info.h>
#include <linux/stackleak.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -451,6 +452,8 @@ void put_task_stack(struct task_struct *tsk)

void free_task(struct task_struct *tsk)
{
+ scs_release(tsk);
+
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
@@ -834,6 +837,8 @@ void __init fork_init(void)
NULL, free_vm_stack_cache);
#endif

+ scs_init();
+
lockdep_init_task(&init_task);
uprobes_init();
}
@@ -893,6 +898,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_stack;

+ err = scs_prepare(tsk, node);
+ if (err)
+ goto free_stack;
+
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index dd05a378631a..6769e27052bf 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11,6 +11,7 @@
#include <linux/nospec.h>

#include <linux/kcov.h>
+#include <linux/scs.h>

#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -6018,6 +6019,7 @@ void init_idle(struct task_struct *idle, int cpu)
idle->se.exec_start = sched_clock();
idle->flags |= PF_IDLE;

+ scs_task_reset(idle);
kasan_unpoison_task_stack(idle);

#ifdef CONFIG_SMP
diff --git a/kernel/scs.c b/kernel/scs.c
new file mode 100644
index 000000000000..e3234a4b92ec
--- /dev/null
+++ b/kernel/scs.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/kasan.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/scs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/scs.h>
+
+static inline void *__scs_base(struct task_struct *tsk)
+{
+ /*
+ * To minimize risk the of exposure, architectures may clear a
+ * task's thread_info::shadow_call_stack while that task is
+ * running, and only save/restore the active shadow call stack
+ * pointer when the usual register may be clobbered (e.g. across
+ * context switches).
+ *
+ * The shadow call stack is aligned to SCS_SIZE, and grows
+ * upwards, so we can mask out the low bits to extract the base
+ * when the task is not running.
+ */
+ return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
+}
+
+static inline unsigned long *scs_magic(void *s)
+{
+ return (unsigned long *)(s + SCS_SIZE) - 1;
+}
+
+static inline void scs_set_magic(void *s)
+{
+ *scs_magic(s) = SCS_END_MAGIC;
+}
+
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+
+/* Matches NR_CACHED_STACKS for VMAP_STACK */
+#define NR_CACHED_SCS 2
+static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]);
+
+static void *scs_alloc(int node)
+{
+ int i;
+ void *s;
+
+ for (i = 0; i < NR_CACHED_SCS; i++) {
+ s = this_cpu_xchg(scs_cache[i], NULL);
+ if (s) {
+ memset(s, 0, SCS_SIZE);
+ goto out;
+ }
+ }
+
+ /*
+ * We allocate a full page for the shadow stack, which should be
+ * more than we need. Check the assumption nevertheless.
+ */
+ BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);
+
+ s = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL, 0,
+ node, __builtin_return_address(0));
+
+out:
+ if (s)
+ scs_set_magic(s);
+ /* TODO: poison for KASAN, unpoison in scs_free */
+
+ return s;
+}
+
+static void scs_free(void *s)
+{
+ int i;
+
+ for (i = 0; i < NR_CACHED_SCS; i++)
+ if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL)
+ return;
+
+ vfree_atomic(s);
+}
+
+static int scs_cleanup(unsigned int cpu)
+{
+ int i;
+ void **cache = per_cpu_ptr(scs_cache, cpu);
+
+ for (i = 0; i < NR_CACHED_SCS; i++) {
+ vfree(cache[i]);
+ cache[i] = NULL;
+ }
+
+ return 0;
+}
+
+void __init scs_init(void)
+{
+ WARN_ON(cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
+ scs_cleanup));
+}
+
+#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */
+
+static struct kmem_cache *scs_cache;
+
+static inline void *scs_alloc(int node)
+{
+ void *s;
+
+ s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
+ if (s) {
+ scs_set_magic(s);
+ /*
+ * Poison the allocation to catch unintentional accesses to
+ * the shadow stack when KASAN is enabled.
+ */
+ kasan_poison_object_data(scs_cache, s);
+ }
+
+ return s;
+}
+
+static inline void scs_free(void *s)
+{
+ kasan_unpoison_object_data(scs_cache, s);
+ kmem_cache_free(scs_cache, s);
+}
+
+void __init scs_init(void)
+{
+ scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
+ 0, NULL);
+ WARN_ON(!scs_cache);
+}
+
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+void scs_task_reset(struct task_struct *tsk)
+{
+ /*
+ * Reset the shadow stack to the base address in case the task
+ * is reused.
+ */
+ task_set_scs(tsk, __scs_base(tsk));
+}
+
+int scs_prepare(struct task_struct *tsk, int node)
+{
+ void *s;
+
+ s = scs_alloc(node);
+ if (!s)
+ return -ENOMEM;
+
+ task_set_scs(tsk, s);
+ return 0;
+}
+
+bool scs_corrupted(struct task_struct *tsk)
+{
+ unsigned long *magic = scs_magic(__scs_base(tsk));
+
+ return READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
+}
+
+void scs_release(struct task_struct *tsk)
+{
+ void *s;
+
+ s = __scs_base(tsk);
+ if (!s)
+ return;
+
+ WARN_ON(scs_corrupted(tsk));
+
+ task_set_scs(tsk, NULL);
+ scs_free(s);
+}
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-05 23:57:51

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v5 06/14] scs: add accounting

This change adds accounting for the memory allocated for shadow stacks.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
drivers/base/node.c | 6 ++++++
fs/proc/meminfo.c | 4 ++++
include/linux/mmzone.h | 3 +++
kernel/scs.c | 20 ++++++++++++++++++++
mm/page_alloc.c | 6 ++++++
mm/vmstat.c | 3 +++
6 files changed, 42 insertions(+)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 296546ffed6c..111e58ec231e 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d AnonPages: %8lu kB\n"
"Node %d Shmem: %8lu kB\n"
"Node %d KernelStack: %8lu kB\n"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ "Node %d ShadowCallStack:%8lu kB\n"
+#endif
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
@@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8c1f1bb1a5ce..49768005a79e 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB));
+#ifdef CONFIG_SHADOW_CALL_STACK
+ seq_printf(m, "ShadowCallStack:%8lu kB\n",
+ global_zone_page_state(NR_KERNEL_SCS_BYTES) / 1024);
+#endif
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index bda20282746b..fcb8c1708f9e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -200,6 +200,9 @@ enum zone_stat_item {
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ NR_KERNEL_SCS_BYTES, /* measured in bytes */
+#endif
/* Second 128 byte cacheline */
NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
diff --git a/kernel/scs.c b/kernel/scs.c
index e3234a4b92ec..4f5774b6f27d 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -12,6 +12,7 @@
#include <linux/scs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/vmstat.h>
#include <asm/scs.h>

static inline void *__scs_base(struct task_struct *tsk)
@@ -89,6 +90,11 @@ static void scs_free(void *s)
vfree_atomic(s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return vmalloc_to_page(__scs_base(tsk));
+}
+
static int scs_cleanup(unsigned int cpu)
{
int i;
@@ -135,6 +141,11 @@ static inline void scs_free(void *s)
kmem_cache_free(scs_cache, s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return virt_to_page(__scs_base(tsk));
+}
+
void __init scs_init(void)
{
scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
@@ -153,6 +164,12 @@ void scs_task_reset(struct task_struct *tsk)
task_set_scs(tsk, __scs_base(tsk));
}

+static void scs_account(struct task_struct *tsk, int account)
+{
+ mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_BYTES,
+ account * SCS_SIZE);
+}
+
int scs_prepare(struct task_struct *tsk, int node)
{
void *s;
@@ -162,6 +179,8 @@ int scs_prepare(struct task_struct *tsk, int node)
return -ENOMEM;

task_set_scs(tsk, s);
+ scs_account(tsk, 1);
+
return 0;
}

@@ -182,6 +201,7 @@ void scs_release(struct task_struct *tsk)

WARN_ON(scs_corrupted(tsk));

+ scs_account(tsk, -1);
task_set_scs(tsk, NULL);
scs_free(s);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ecc3dbad606b..fe17d69d98a7 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5361,6 +5361,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" managed:%lukB"
" mlocked:%lukB"
" kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ " shadow_call_stack:%lukB"
+#endif
" pagetables:%lukB"
" bounce:%lukB"
" free_pcp:%lukB"
@@ -5382,6 +5385,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)),
zone_page_state(zone, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ zone_page_state(zone, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
K(zone_page_state(zone, NR_PAGETABLE)),
K(zone_page_state(zone, NR_BOUNCE)),
K(free_pcp),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 6afc892a148a..9fe4afe670fe 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1118,6 +1118,9 @@ const char * const vmstat_text[] = {
"nr_mlock",
"nr_page_table_pages",
"nr_kernel_stack",
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ "nr_shadow_call_stack_bytes",
+#endif
"nr_bounce",
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-05 23:58:00

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v5 08/14] arm64: disable function graph tracing with SCS

The graph tracer hooks returns by modifying frame records on the
(regular) stack, but with SCS the return address is taken from the
shadow stack, and the value in the frame record has no effect. As we
don't currently have a mechanism to determine the corresponding slot
on the shadow stack (and to pass this through the ftrace
infrastructure), for now let's disable the graph tracer when SCS is
enabled.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 3f047afb982c..8cda176dad9a 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -148,7 +148,7 @@ config ARM64
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
- select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FUNCTION_GRAPH_TRACER if !SHADOW_CALL_STACK
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IRQ_TIME_ACCOUNTING
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-05 23:58:04

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v5 09/14] arm64: reserve x18 from general allocation with SCS

Reserve the x18 register from general allocation when SCS is enabled,
because the compiler uses the register to store the current task's
shadow stack pointer. Note that all external kernel modules must also be
compiled with -ffixed-x18 if the kernel has SCS enabled.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/Makefile | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 2c0238ce0551..ef76101201b2 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -72,6 +72,10 @@ stack_protector_prepare: prepare0
include/generated/asm-offsets.h))
endif

+ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
+KBUILD_CFLAGS += -ffixed-x18
+endif
+
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
CHECKFLAGS += -D__AARCH64EB__
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-05 23:58:04

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v5 10/14] arm64: preserve x18 when CPU is suspended

Don't lose the current task's shadow stack when the CPU is suspended.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/mm/proc.S | 14 ++++++++++++++
2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 8939c87c4dce..0cde2f473971 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H

-#define NR_CTX_REGS 12
+#define NR_CTX_REGS 13
#define NR_CALLEE_SAVED_REGS 12

/*
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index fdabf40a83c8..5c8219c55948 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -49,6 +49,8 @@
* cpu_do_suspend - save CPU registers context
*
* x0: virtual address of context pointer
+ *
+ * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
*/
ENTRY(cpu_do_suspend)
mrs x2, tpidr_el0
@@ -73,6 +75,11 @@ alternative_endif
stp x8, x9, [x0, #48]
stp x10, x11, [x0, #64]
stp x12, x13, [x0, #80]
+ /*
+ * Save x18 as it may be used as a platform register, e.g. by shadow
+ * call stack.
+ */
+ str x18, [x0, #96]
ret
ENDPROC(cpu_do_suspend)

@@ -89,6 +96,13 @@ ENTRY(cpu_do_resume)
ldp x9, x10, [x0, #48]
ldp x11, x12, [x0, #64]
ldp x13, x14, [x0, #80]
+ /*
+ * Restore x18, as it may be used as a platform register, and clear
+ * the buffer to minimize the risk of exposure when used for shadow
+ * call stack.
+ */
+ ldr x18, [x0, #96]
+ str xzr, [x0, #96]
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-05 23:58:14

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v5 11/14] arm64: efi: restore x18 if it was corrupted

If we detect a corrupted x18 and SCS is enabled, restore the register
before jumping back to instrumented code. This is safe, because the
wrapper is called with preemption disabled and a separate shadow stack
is used for interrupt handling.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/kernel/efi-rt-wrapper.S | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 3fc71106cb2b..945744f16086 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -34,5 +34,10 @@ ENTRY(__efi_rt_asm_wrapper)
ldp x29, x30, [sp], #32
b.ne 0f
ret
-0: b efi_handle_corrupted_x18 // tail call
+0:
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* Restore x18 before returning to instrumented code. */
+ mov x18, x2
+#endif
+ b efi_handle_corrupted_x18 // tail call
ENDPROC(__efi_rt_asm_wrapper)
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-05 23:58:47

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v5 14/14] arm64: implement Shadow Call Stack

This change implements shadow stack switching, initial SCS set-up,
and interrupt shadow stacks for arm64.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/Kconfig | 5 ++++
arch/arm64/include/asm/scs.h | 37 ++++++++++++++++++++++++++
arch/arm64/include/asm/stacktrace.h | 4 +++
arch/arm64/include/asm/thread_info.h | 3 +++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +++
arch/arm64/kernel/entry.S | 28 ++++++++++++++++++++
arch/arm64/kernel/head.S | 9 +++++++
arch/arm64/kernel/irq.c | 2 ++
arch/arm64/kernel/process.c | 2 ++
arch/arm64/kernel/scs.c | 39 ++++++++++++++++++++++++++++
arch/arm64/kernel/smp.c | 4 +++
12 files changed, 137 insertions(+)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 8cda176dad9a..76e32d01d759 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -66,6 +66,7 @@ config ARM64
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_SUPPORTS_MEMORY_FAILURE
+ select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if GCC_VERSION >= 50000 || CC_IS_CLANG
select ARCH_SUPPORTS_NUMA_BALANCING
@@ -948,6 +949,10 @@ config ARCH_HAS_CACHE_LINE_SIZE
config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y if PGTABLE_LEVELS > 2

+# Supported by clang >= 7.0
+config CC_HAVE_SHADOW_CALL_STACK
+ def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
+
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
---help---
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
new file mode 100644
index 000000000000..c50d2b0c6c5f
--- /dev/null
+++ b/arch/arm64/include/asm/scs.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/scs.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+extern void scs_init_irq(void);
+
+static __always_inline void scs_save(struct task_struct *tsk)
+{
+ void *s;
+
+ asm volatile("mov %0, x18" : "=r" (s));
+ task_set_scs(tsk, s);
+}
+
+static inline void scs_overflow_check(struct task_struct *tsk)
+{
+ if (unlikely(scs_corrupted(tsk)))
+ panic("corrupted shadow stack detected inside scheduler\n");
+}
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+static inline void scs_init_irq(void) {}
+static inline void scs_save(struct task_struct *tsk) {}
+static inline void scs_overflow_check(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* __ASSEMBLY __ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 4d9b1f48dc39..b6cf32fb4efe 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -68,6 +68,10 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);

DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);

+#ifdef CONFIG_SHADOW_CALL_STACK
+DECLARE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+#endif
+
static inline bool on_irq_stack(unsigned long sp,
struct stack_info *info)
{
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index f0cec4160136..8c73764b9ed2 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -41,6 +41,9 @@ struct thread_info {
#endif
} preempt;
};
+#ifdef CONFIG_SHADOW_CALL_STACK
+ void *shadow_call_stack;
+#endif
};

#define thread_saved_pc(tsk) \
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 478491f07b4f..b3995329d9e5 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-y += vdso/ probes/
obj-$(CONFIG_COMPAT_VDSO) += vdso32/
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 214685760e1c..f6762b9ae1e1 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -33,6 +33,9 @@ int main(void)
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
+#endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+ DEFINE(TSK_TI_SCS, offsetof(struct task_struct, thread_info.shadow_call_stack));
#endif
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
#ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index cf3bd2976e57..1eff08c71403 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -172,6 +172,10 @@ alternative_cb_end

apply_ssbd 1, x22, x23

+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [tsk, #TSK_TI_SCS] // Restore shadow call stack
+ str xzr, [tsk, #TSK_TI_SCS] // Limit visibility of saved SCS
+#endif
.else
add x21, sp, #S_FRAME_SIZE
get_current_task tsk
@@ -278,6 +282,12 @@ alternative_else_nop_endif
ct_user_enter
.endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ .if \el == 0
+ str x18, [tsk, #TSK_TI_SCS] // Save shadow call stack
+ .endif
+#endif
+
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
@@ -383,6 +393,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

.macro irq_stack_entry
mov x19, sp // preserve the original sp
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x20, x18 // preserve the original shadow stack
+#endif

/*
* Compare sp with the base of the task stack.
@@ -400,6 +413,12 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

/* switch to the irq stack */
mov sp, x26
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* also switch to the irq shadow stack */
+ ldr_this_cpu x18, irq_shadow_call_stack_ptr, x26
+#endif
+
9998:
.endm

@@ -409,6 +428,10 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
*/
.macro irq_stack_exit
mov sp, x19
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* x20 is also preserved */
+ mov x18, x20
+#endif
.endm

/* GPRs used by entry code */
@@ -1155,6 +1178,11 @@ ENTRY(cpu_switch_to)
ldr lr, [x8]
mov sp, x9
msr sp_el0, x1
+#ifdef CONFIG_SHADOW_CALL_STACK
+ str x18, [x0, #TSK_TI_SCS]
+ ldr x18, [x1, #TSK_TI_SCS]
+ str xzr, [x1, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
ret
ENDPROC(cpu_switch_to)
NOKPROBE(cpu_switch_to)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 989b1944cb71..ca561de903d4 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -27,6 +27,7 @@
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/scs.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
@@ -424,6 +425,10 @@ __primary_switched:
stp xzr, x30, [sp, #-16]!
mov x29, sp

+#ifdef CONFIG_SHADOW_CALL_STACK
+ adr_l x18, init_shadow_call_stack // Set shadow call stack
+#endif
+
str_l x21, __fdt_pointer, x5 // Save FDT pointer

ldr_l x4, kimage_vaddr // Save the offset between
@@ -731,6 +736,10 @@ __secondary_switched:
ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow
msr sp_el0, x2
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [x2, #TSK_TI_SCS] // set shadow call stack
+ str xzr, [x2, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
mov x29, #0
mov x30, #0
b secondary_start_kernel
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 04a327ccf84d..fe0ca522ff60 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -21,6 +21,7 @@
#include <linux/vmalloc.h>
#include <asm/daifflags.h>
#include <asm/vmap_stack.h>
+#include <asm/scs.h>

unsigned long irq_err_count;

@@ -63,6 +64,7 @@ static void init_irq_stacks(void)
void __init init_IRQ(void)
{
init_irq_stacks();
+ scs_init_irq();
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 71f788cd2b18..5f0aec285848 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -52,6 +52,7 @@
#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/pointer_auth.h>
+#include <asm/scs.h>
#include <asm/stacktrace.h>

#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
@@ -507,6 +508,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
uao_thread_switch(next);
ptrauth_thread_switch(next);
ssbs_thread_switch(next);
+ scs_overflow_check(next);

/*
* Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
new file mode 100644
index 000000000000..6f255072c9a9
--- /dev/null
+++ b/arch/arm64/kernel/scs.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/percpu.h>
+#include <linux/vmalloc.h>
+#include <asm/scs.h>
+
+DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+
+#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
+DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
+ __aligned(SCS_SIZE);
+#endif
+
+void scs_init_irq(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+ unsigned long *p;
+
+ p = __vmalloc_node_range(SCS_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ SCS_GFP, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
+#else
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ per_cpu(irq_shadow_call_stack, cpu);
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+ }
+}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index dc9fe879c279..cc1938a585d2 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -44,6 +44,7 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
+#include <asm/scs.h>
#include <asm/smp_plat.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -357,6 +358,9 @@ void cpu_die(void)
{
unsigned int cpu = smp_processor_id();

+ /* Save the shadow stack pointer before exiting the idle task */
+ scs_save(current);
+
idle_task_exit();

local_daif_mask();
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-05 23:59:03

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v5 07/14] scs: add support for stack usage debugging

Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks. When enabled,
also prints out the highest shadow stack usage per process.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
kernel/scs.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)

diff --git a/kernel/scs.c b/kernel/scs.c
index 4f5774b6f27d..a47fae33efdc 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -184,6 +184,44 @@ int scs_prepare(struct task_struct *tsk, int node)
return 0;
}

+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long scs_used(struct task_struct *tsk)
+{
+ unsigned long *p = __scs_base(tsk);
+ unsigned long *end = scs_magic(p);
+ unsigned long s = (unsigned long)p;
+
+ while (p < end && READ_ONCE_NOCHECK(*p))
+ p++;
+
+ return (unsigned long)p - s;
+}
+
+static void scs_check_usage(struct task_struct *tsk)
+{
+ static DEFINE_SPINLOCK(lock);
+ static unsigned long highest;
+ unsigned long used = scs_used(tsk);
+
+ if (used <= highest)
+ return;
+
+ spin_lock(&lock);
+
+ if (used > highest) {
+ pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
+ tsk->comm, task_pid_nr(tsk), used);
+ highest = used;
+ }
+
+ spin_unlock(&lock);
+}
+#else
+static inline void scs_check_usage(struct task_struct *tsk)
+{
+}
+#endif
+
bool scs_corrupted(struct task_struct *tsk)
{
unsigned long *magic = scs_magic(__scs_base(tsk));
@@ -200,6 +238,7 @@ void scs_release(struct task_struct *tsk)
return;

WARN_ON(scs_corrupted(tsk));
+ scs_check_usage(tsk);

scs_account(tsk, -1);
task_set_scs(tsk, NULL);
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-05 23:59:26

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v5 03/14] arm64: kvm: stop treating register x18 as caller save

From: Ard Biesheuvel <[email protected]>

In preparation of reserving x18, stop treating it as caller save in
the KVM guest entry/exit code. Currently, the code assumes there is
no need to preserve it for the host, given that it would have been
assumed clobbered anyway by the function call to __guest_enter().
Instead, preserve its value and restore it upon return.

Link: https://patchwork.kernel.org/patch/9836891/
Signed-off-by: Ard Biesheuvel <[email protected]>
[Sami: updated commit message, switched from x18 to x29 for the guest context]
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Marc Zyngier <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/arm64/kvm/hyp/entry.S | 45 ++++++++++++++++++++------------------
1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index e5cc8d66bf53..0c6832ec52b1 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -22,7 +22,12 @@
.text
.pushsection .hyp.text, "ax"

+/*
+ * We treat x18 as callee-saved as the host may use it as a platform
+ * register (e.g. for shadow call stack).
+ */
.macro save_callee_saved_regs ctxt
+ str x18, [\ctxt, #CPU_XREG_OFFSET(18)]
stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
@@ -32,6 +37,8 @@
.endm

.macro restore_callee_saved_regs ctxt
+ // We require \ctxt is not x18-x28
+ ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)]
ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
@@ -48,7 +55,7 @@ ENTRY(__guest_enter)
// x0: vcpu
// x1: host context
// x2-x17: clobbered by macros
- // x18: guest context
+ // x29: guest context

// Store the host regs
save_callee_saved_regs x1
@@ -67,31 +74,28 @@ alternative_else_nop_endif
ret

1:
- add x18, x0, #VCPU_CONTEXT
+ add x29, x0, #VCPU_CONTEXT

// Macro ptrauth_switch_to_guest format:
// ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
// The below macro to restore guest keys is not implemented in C code
// as it may cause Pointer Authentication key signing mismatch errors
// when this feature is enabled for kernel code.
- ptrauth_switch_to_guest x18, x0, x1, x2
+ ptrauth_switch_to_guest x29, x0, x1, x2

// Restore guest regs x0-x17
- ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)]
- ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)]
- ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)]
- ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)]
- ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)]
- ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)]
- ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)]
- ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)]
- ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)]
-
- // Restore guest regs x19-x29, lr
- restore_callee_saved_regs x18
-
- // Restore guest reg x18
- ldr x18, [x18, #CPU_XREG_OFFSET(18)]
+ ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)]
+ ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)]
+ ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)]
+ ldp x6, x7, [x29, #CPU_XREG_OFFSET(6)]
+ ldp x8, x9, [x29, #CPU_XREG_OFFSET(8)]
+ ldp x10, x11, [x29, #CPU_XREG_OFFSET(10)]
+ ldp x12, x13, [x29, #CPU_XREG_OFFSET(12)]
+ ldp x14, x15, [x29, #CPU_XREG_OFFSET(14)]
+ ldp x16, x17, [x29, #CPU_XREG_OFFSET(16)]
+
+ // Restore guest regs x18-x29, lr
+ restore_callee_saved_regs x29

// Do not touch any register after this!
eret
@@ -114,7 +118,7 @@ ENTRY(__guest_exit)
// Retrieve the guest regs x0-x1 from the stack
ldp x2, x3, [sp], #16 // x0, x1

- // Store the guest regs x0-x1 and x4-x18
+ // Store the guest regs x0-x1 and x4-x17
stp x2, x3, [x1, #CPU_XREG_OFFSET(0)]
stp x4, x5, [x1, #CPU_XREG_OFFSET(4)]
stp x6, x7, [x1, #CPU_XREG_OFFSET(6)]
@@ -123,9 +127,8 @@ ENTRY(__guest_exit)
stp x12, x13, [x1, #CPU_XREG_OFFSET(12)]
stp x14, x15, [x1, #CPU_XREG_OFFSET(14)]
stp x16, x17, [x1, #CPU_XREG_OFFSET(16)]
- str x18, [x1, #CPU_XREG_OFFSET(18)]

- // Store the guest regs x19-x29, lr
+ // Store the guest regs x18-x29, lr
save_callee_saved_regs x1

get_host_ctxt x2, x3
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-05 23:59:46

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v5 12/14] arm64: vdso: disable Shadow Call Stack

Shadow stacks are only available in the kernel, so disable SCS
instrumentation for the vDSO.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/kernel/vdso/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index dd2514bb1511..a87a4f11724e 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -25,7 +25,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING

VDSO_LDFLAGS := -Bsymbolic

-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS)
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-05 23:59:47

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v5 13/14] arm64: disable SCS for hypervisor code

Filter out CC_FLAGS_SCS for code that runs at a different exception
level.

Suggested-by: Steven Rostedt (VMware) <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/kvm/hyp/Makefile | 3 +++
1 file changed, 3 insertions(+)

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index ea710f674cb6..17ea3da325e9 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -28,3 +28,6 @@ GCOV_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
+
+# remove the SCS flags from all objects in this directory
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
--
2.24.0.rc1.363.gb1bccd3e3d-goog

2019-11-06 04:49:09

by Miguel Ojeda

[permalink] [raw]
Subject: Re: [PATCH v5 11/14] arm64: efi: restore x18 if it was corrupted

On Wed, Nov 6, 2019 at 12:56 AM Sami Tolvanen <[email protected]> wrote:
>
> If we detect a corrupted x18 and SCS is enabled, restore the register
> before jumping back to instrumented code. This is safe, because the
> wrapper is called with preemption disabled and a separate shadow stack
> is used for interrupt handling.

In case you do v6: I think putting the explanation about why this is
safe in the existing comment would be best given it is justifying a
subtlety of the code rather than the change itself. Ard?

Cheers,
Miguel

2019-11-06 20:43:54

by Nick Desaulniers

[permalink] [raw]
Subject: Re: [PATCH v5 10/14] arm64: preserve x18 when CPU is suspended

On Tue, Nov 5, 2019 at 3:56 PM Sami Tolvanen <[email protected]> wrote:
>
> Don't lose the current task's shadow stack when the CPU is suspended.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Nick Desaulniers <[email protected]>

Re-LGTM

> Reviewed-by: Kees Cook <[email protected]>
> ---
> arch/arm64/include/asm/suspend.h | 2 +-
> arch/arm64/mm/proc.S | 14 ++++++++++++++
> 2 files changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
> index 8939c87c4dce..0cde2f473971 100644
> --- a/arch/arm64/include/asm/suspend.h
> +++ b/arch/arm64/include/asm/suspend.h
> @@ -2,7 +2,7 @@
> #ifndef __ASM_SUSPEND_H
> #define __ASM_SUSPEND_H
>
> -#define NR_CTX_REGS 12
> +#define NR_CTX_REGS 13
> #define NR_CALLEE_SAVED_REGS 12
>
> /*
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index fdabf40a83c8..5c8219c55948 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -49,6 +49,8 @@
> * cpu_do_suspend - save CPU registers context
> *
> * x0: virtual address of context pointer
> + *
> + * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
> */
> ENTRY(cpu_do_suspend)
> mrs x2, tpidr_el0
> @@ -73,6 +75,11 @@ alternative_endif
> stp x8, x9, [x0, #48]
> stp x10, x11, [x0, #64]
> stp x12, x13, [x0, #80]
> + /*
> + * Save x18 as it may be used as a platform register, e.g. by shadow
> + * call stack.
> + */
> + str x18, [x0, #96]
> ret
> ENDPROC(cpu_do_suspend)
>
> @@ -89,6 +96,13 @@ ENTRY(cpu_do_resume)
> ldp x9, x10, [x0, #48]
> ldp x11, x12, [x0, #64]
> ldp x13, x14, [x0, #80]
> + /*
> + * Restore x18, as it may be used as a platform register, and clear
> + * the buffer to minimize the risk of exposure when used for shadow
> + * call stack.
> + */
> + ldr x18, [x0, #96]
> + str xzr, [x0, #96]
> msr tpidr_el0, x2
> msr tpidrro_el0, x3
> msr contextidr_el1, x4
> --
> 2.24.0.rc1.363.gb1bccd3e3d-goog
>


--
Thanks,
~Nick Desaulniers

2019-11-07 10:54:26

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH v5 11/14] arm64: efi: restore x18 if it was corrupted

On Wed, 6 Nov 2019 at 05:46, Miguel Ojeda
<[email protected]> wrote:
>
> On Wed, Nov 6, 2019 at 12:56 AM Sami Tolvanen <[email protected]> wrote:
> >
> > If we detect a corrupted x18 and SCS is enabled, restore the register
> > before jumping back to instrumented code. This is safe, because the
> > wrapper is called with preemption disabled and a separate shadow stack
> > is used for interrupt handling.
>
> In case you do v6: I think putting the explanation about why this is
> safe in the existing comment would be best given it is justifying a
> subtlety of the code rather than the change itself. Ard?
>

Agreed, but only if you have to respin for other reasons.

2019-11-07 16:29:53

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v5 11/14] arm64: efi: restore x18 if it was corrupted

On Thu, Nov 7, 2019 at 2:51 AM Ard Biesheuvel <[email protected]> wrote:
>
> On Wed, 6 Nov 2019 at 05:46, Miguel Ojeda
> <[email protected]> wrote:
> >
> > On Wed, Nov 6, 2019 at 12:56 AM Sami Tolvanen <[email protected]> wrote:
> > >
> > > If we detect a corrupted x18 and SCS is enabled, restore the register
> > > before jumping back to instrumented code. This is safe, because the
> > > wrapper is called with preemption disabled and a separate shadow stack
> > > is used for interrupt handling.
> >
> > In case you do v6: I think putting the explanation about why this is
> > safe in the existing comment would be best given it is justifying a
> > subtlety of the code rather than the change itself. Ard?
> >
>
> Agreed, but only if you have to respin for other reasons.

Sure, sounds good to me. I'll update the comment if other changes are needed.

Sami

2019-11-12 23:47:10

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v5 00/14] add support for Clang's Shadow Call Stack

On Tue, Nov 05, 2019 at 03:55:54PM -0800, Sami Tolvanen wrote:
> This patch series adds support for Clang's Shadow Call Stack
> (SCS) mitigation, which uses a separately allocated shadow stack
> to protect against return address overwrites. More information

Will, Catalin, Mark,

What's the next step here? I *think* all the comments have been
addressed. Is it possible to land this via the arm tree for v5.5?

Thanks!

--
Kees Cook

2019-11-13 12:27:20

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v5 00/14] add support for Clang's Shadow Call Stack

On Tue, Nov 12, 2019 at 03:44:42PM -0800, Kees Cook wrote:
> On Tue, Nov 05, 2019 at 03:55:54PM -0800, Sami Tolvanen wrote:
> > This patch series adds support for Clang's Shadow Call Stack
> > (SCS) mitigation, which uses a separately allocated shadow stack
> > to protect against return address overwrites. More information
>
> Will, Catalin, Mark,
>
> What's the next step here? I *think* all the comments have been
> addressed. Is it possible to land this via the arm tree for v5.5?

I was planning to queue this for 5.6, given that I'd really like it to
spend some quality time in linux-next.

Will

2019-11-13 18:34:53

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v5 00/14] add support for Clang's Shadow Call Stack

On Wed, Nov 13, 2019 at 12:03:38PM +0000, Will Deacon wrote:
> On Tue, Nov 12, 2019 at 03:44:42PM -0800, Kees Cook wrote:
> > On Tue, Nov 05, 2019 at 03:55:54PM -0800, Sami Tolvanen wrote:
> > > This patch series adds support for Clang's Shadow Call Stack
> > > (SCS) mitigation, which uses a separately allocated shadow stack
> > > to protect against return address overwrites. More information
> >
> > Will, Catalin, Mark,
> >
> > What's the next step here? I *think* all the comments have been
> > addressed. Is it possible to land this via the arm tree for v5.5?
>
> I was planning to queue this for 5.6, given that I'd really like it to
> spend some quality time in linux-next.

Sounds fine to me; I just wanted to have an idea what to expect. :)
Thanks!

--
Kees Cook

2019-11-13 20:28:11

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH v4 08/17] kprobes: fix compilation without CONFIG_KRETPROBES

On Fri, 1 Nov 2019 15:11:41 -0700
Sami Tolvanen <[email protected]> wrote:

> kprobe_on_func_entry and arch_kprobe_on_func_entry need to be available
> even if CONFIG_KRETPROBES is not selected.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Acked-by: Masami Hiramatsu <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>

Acked-by: Steven Rostedt (VMware) <[email protected]>

-- Steve

> ---
> kernel/kprobes.c | 38 +++++++++++++++++++-------------------
> 1 file changed, 19 insertions(+), 19 deletions(-)
>
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 53534aa258a6..b5e20a4669b8 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -1829,6 +1829,25 @@ unsigned long __weak arch_deref_entry_point(void *entry)
> return (unsigned long)entry;
> }
>
> +bool __weak arch_kprobe_on_func_entry(unsigned long offset)
> +{
> + return !offset;
> +}
> +
> +bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
> +{
> + kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
> +
> + if (IS_ERR(kp_addr))
> + return false;
> +
> + if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
> + !arch_kprobe_on_func_entry(offset))
> + return false;
> +
> + return true;
> +}
> +
> #ifdef CONFIG_KRETPROBES
> /*
> * This kprobe pre_handler is registered with every kretprobe. When probe
> @@ -1885,25 +1904,6 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
> }
> NOKPROBE_SYMBOL(pre_handler_kretprobe);
>
> -bool __weak arch_kprobe_on_func_entry(unsigned long offset)
> -{
> - return !offset;
> -}
> -
> -bool kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset)
> -{
> - kprobe_opcode_t *kp_addr = _kprobe_addr(addr, sym, offset);
> -
> - if (IS_ERR(kp_addr))
> - return false;
> -
> - if (!kallsyms_lookup_size_offset((unsigned long)kp_addr, NULL, &offset) ||
> - !arch_kprobe_on_func_entry(offset))
> - return false;
> -
> - return true;
> -}
> -
> int register_kretprobe(struct kretprobe *rp)
> {
> int ret = 0;

2019-11-15 14:20:11

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v5 08/14] arm64: disable function graph tracing with SCS

On Tue, Nov 05, 2019 at 03:56:02PM -0800, Sami Tolvanen wrote:
> The graph tracer hooks returns by modifying frame records on the
> (regular) stack, but with SCS the return address is taken from the
> shadow stack, and the value in the frame record has no effect. As we
> don't currently have a mechanism to determine the corresponding slot
> on the shadow stack (and to pass this through the ftrace
> infrastructure), for now let's disable the graph tracer when SCS is
> enabled.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>

Reviewed-by: Mark Rutland <[email protected]>

Mark.

> ---
> arch/arm64/Kconfig | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index 3f047afb982c..8cda176dad9a 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -148,7 +148,7 @@ config ARM64
> select HAVE_FTRACE_MCOUNT_RECORD
> select HAVE_FUNCTION_TRACER
> select HAVE_FUNCTION_ERROR_INJECTION
> - select HAVE_FUNCTION_GRAPH_TRACER
> + select HAVE_FUNCTION_GRAPH_TRACER if !SHADOW_CALL_STACK
> select HAVE_GCC_PLUGINS
> select HAVE_HW_BREAKPOINT if PERF_EVENTS
> select HAVE_IRQ_TIME_ACCOUNTING
> --
> 2.24.0.rc1.363.gb1bccd3e3d-goog
>

2019-11-15 14:20:12

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v5 00/14] add support for Clang's Shadow Call Stack

On Tue, Nov 12, 2019 at 03:44:42PM -0800, Kees Cook wrote:
> On Tue, Nov 05, 2019 at 03:55:54PM -0800, Sami Tolvanen wrote:
> > This patch series adds support for Clang's Shadow Call Stack
> > (SCS) mitigation, which uses a separately allocated shadow stack
> > to protect against return address overwrites. More information
>
> Will, Catalin, Mark,
>
> What's the next step here? I *think* all the comments have been
> addressed.

I'm hoping to look over the remaining bits in the next week or so, and
to throw my test boxes at this shortly.

Thanks,
Mark.

2019-11-15 14:28:47

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v5 10/14] arm64: preserve x18 when CPU is suspended

On Tue, Nov 05, 2019 at 03:56:04PM -0800, Sami Tolvanen wrote:
> Don't lose the current task's shadow stack when the CPU is suspended.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Nick Desaulniers <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
> arch/arm64/include/asm/suspend.h | 2 +-
> arch/arm64/mm/proc.S | 14 ++++++++++++++
> 2 files changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
> index 8939c87c4dce..0cde2f473971 100644
> --- a/arch/arm64/include/asm/suspend.h
> +++ b/arch/arm64/include/asm/suspend.h
> @@ -2,7 +2,7 @@
> #ifndef __ASM_SUSPEND_H
> #define __ASM_SUSPEND_H
>
> -#define NR_CTX_REGS 12
> +#define NR_CTX_REGS 13

For a moment I thought this might impact the alignment of the array, but
I see cpu_suspend_ctx is force-aligned to 16 bytes anyway, and since
commit cabe1c81ea5be983 the only impact would be a performance thing.

Reviewed-by: Mark Rutland <[email protected]>

Mark.

> #define NR_CALLEE_SAVED_REGS 12
>
> /*
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index fdabf40a83c8..5c8219c55948 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -49,6 +49,8 @@
> * cpu_do_suspend - save CPU registers context
> *
> * x0: virtual address of context pointer
> + *
> + * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
> */
> ENTRY(cpu_do_suspend)
> mrs x2, tpidr_el0
> @@ -73,6 +75,11 @@ alternative_endif
> stp x8, x9, [x0, #48]
> stp x10, x11, [x0, #64]
> stp x12, x13, [x0, #80]
> + /*
> + * Save x18 as it may be used as a platform register, e.g. by shadow
> + * call stack.
> + */
> + str x18, [x0, #96]
> ret
> ENDPROC(cpu_do_suspend)
>
> @@ -89,6 +96,13 @@ ENTRY(cpu_do_resume)
> ldp x9, x10, [x0, #48]
> ldp x11, x12, [x0, #64]
> ldp x13, x14, [x0, #80]
> + /*
> + * Restore x18, as it may be used as a platform register, and clear
> + * the buffer to minimize the risk of exposure when used for shadow
> + * call stack.
> + */
> + ldr x18, [x0, #96]
> + str xzr, [x0, #96]
> msr tpidr_el0, x2
> msr tpidrro_el0, x3
> msr contextidr_el1, x4
> --
> 2.24.0.rc1.363.gb1bccd3e3d-goog
>

2019-11-15 14:46:05

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v5 12/14] arm64: vdso: disable Shadow Call Stack

On Tue, Nov 05, 2019 at 03:56:06PM -0800, Sami Tolvanen wrote:
> Shadow stacks are only available in the kernel, so disable SCS
> instrumentation for the vDSO.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Nick Desaulniers <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>

I gave this a spin, looked at objdump, and found everything in the vDSO
was a leaf function. I hacked the code around a bit to force a function
call, and I see that just uses x29 and x30 as expected, with nothing
touching x18.

Reviewed-by: Mark Rutland <[email protected]>

Mark.

> ---
> arch/arm64/kernel/vdso/Makefile | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
> index dd2514bb1511..a87a4f11724e 100644
> --- a/arch/arm64/kernel/vdso/Makefile
> +++ b/arch/arm64/kernel/vdso/Makefile
> @@ -25,7 +25,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING
>
> VDSO_LDFLAGS := -Bsymbolic
>
> -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
> +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS)
> KBUILD_CFLAGS += $(DISABLE_LTO)
> KASAN_SANITIZE := n
> UBSAN_SANITIZE := n
> --
> 2.24.0.rc1.363.gb1bccd3e3d-goog
>

2019-11-15 14:47:59

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v5 13/14] arm64: disable SCS for hypervisor code

On Tue, Nov 05, 2019 at 03:56:07PM -0800, Sami Tolvanen wrote:
> Filter out CC_FLAGS_SCS for code that runs at a different exception
> level.
>
> Suggested-by: Steven Rostedt (VMware) <[email protected]>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>

Looks sound, and objdump confirms that the hyp objects are using x29's
frame record, without using x18.

Reviewed-by: Mark Rutland <[email protected]>

Mark.

> ---
> arch/arm64/kvm/hyp/Makefile | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
> index ea710f674cb6..17ea3da325e9 100644
> --- a/arch/arm64/kvm/hyp/Makefile
> +++ b/arch/arm64/kvm/hyp/Makefile
> @@ -28,3 +28,6 @@ GCOV_PROFILE := n
> KASAN_SANITIZE := n
> UBSAN_SANITIZE := n
> KCOV_INSTRUMENT := n
> +
> +# remove the SCS flags from all objects in this directory
> +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
> --
> 2.24.0.rc1.363.gb1bccd3e3d-goog
>

2019-11-15 15:25:20

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v5 14/14] arm64: implement Shadow Call Stack

On Tue, Nov 05, 2019 at 03:56:08PM -0800, Sami Tolvanen wrote:
> This change implements shadow stack switching, initial SCS set-up,
> and interrupt shadow stacks for arm64.

Each CPU also has an overflow stack, and two SDEI stacks, which should
presumably be given their own SCS. SDEI is effectively a software-NMI,
so it should almost certainly have the same treatement as IRQ.

> +static __always_inline void scs_save(struct task_struct *tsk)
> +{
> + void *s;
> +
> + asm volatile("mov %0, x18" : "=r" (s));
> + task_set_scs(tsk, s);
> +}

An alternative would be to follow <asm/stack_pointer.h>, and have:

register unsigned long *current_scs_pointer asm ("x18");

static __always_inline void scs_save(struct task_struct *tsk)
{
task_set_scs(tsk, current_scs_pointer);
}

... which would avoid the need for a temporary register where this is
used.

However, given we only use this in cpu_die(), having this as-is should
be fine. Maybe the asm volatile is preferable if we use this elsewhere,
so that we know we have a consistent snapshot that the compiler can't
reload, etc.

[...]

> @@ -409,6 +428,10 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
> */
> .macro irq_stack_exit
> mov sp, x19
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + /* x20 is also preserved */
> + mov x18, x20
> +#endif
> .endm

Can we please fold this comment into the one above, and have:

/*
* The callee-saved regs (x19-x29) should be preserved between
* irq_stack_entry and irq_stack_exit.
*/
.macro irq_stack_exit
mov sp, x19
#ifdef CONFIG_SHADOW_CALL_STACK
mov x18, x20
#endif
.endm

Thanks,
Mark.

2019-11-15 15:41:39

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v5 05/14] add support for Clang's Shadow Call Stack (SCS)

On Tue, Nov 05, 2019 at 03:55:59PM -0800, Sami Tolvanen wrote:
> This change adds generic support for Clang's Shadow Call Stack,
> which uses a shadow stack to protect return addresses from being
> overwritten by an attacker. Details are available here:
>
> https://clang.llvm.org/docs/ShadowCallStack.html
>
> Note that security guarantees in the kernel differ from the
> ones documented for user space. The kernel must store addresses
> of shadow stacks used by other tasks and interrupt handlers in
> memory, which means an attacker capable reading and writing
> arbitrary memory may be able to locate them and hijack control
> flow by modifying shadow stacks that are not currently in use.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> Reviewed-by: Miguel Ojeda <[email protected]>
> ---
> Makefile | 6 ++
> arch/Kconfig | 33 ++++++
> include/linux/compiler-clang.h | 6 ++
> include/linux/compiler_types.h | 4 +
> include/linux/scs.h | 57 ++++++++++
> init/init_task.c | 8 ++
> kernel/Makefile | 1 +
> kernel/fork.c | 9 ++
> kernel/sched/core.c | 2 +
> kernel/scs.c | 187 +++++++++++++++++++++++++++++++++
> 10 files changed, 313 insertions(+)
> create mode 100644 include/linux/scs.h
> create mode 100644 kernel/scs.c
>
> diff --git a/Makefile b/Makefile
> index b37d0e8fc61d..7f3a4c5c7dcc 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -846,6 +846,12 @@ ifdef CONFIG_LIVEPATCH
> KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
> endif
>
> +ifdef CONFIG_SHADOW_CALL_STACK
> +CC_FLAGS_SCS := -fsanitize=shadow-call-stack
> +KBUILD_CFLAGS += $(CC_FLAGS_SCS)
> +export CC_FLAGS_SCS
> +endif
> +
> # arch Makefile may override CC so keep this after arch Makefile is included
> NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 5f8a5d84dbbe..5e34cbcd8d6a 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -521,6 +521,39 @@ config STACKPROTECTOR_STRONG
> about 20% of all kernel functions, which increases the kernel code
> size by about 2%.
>
> +config ARCH_SUPPORTS_SHADOW_CALL_STACK
> + bool
> + help
> + An architecture should select this if it supports Clang's Shadow
> + Call Stack, has asm/scs.h, and implements runtime support for shadow
> + stack switching.
> +
> +config SHADOW_CALL_STACK_VMAP
> + bool
> + depends on SHADOW_CALL_STACK
> + help
> + Use virtually mapped shadow call stacks. Selecting this option
> + provides better stack exhaustion protection, but increases per-thread
> + memory consumption as a full page is allocated for each shadow stack.

The bool needs some display text to make it selectable.

This should probably be below SHADOW_CALL_STACK so that when it shows up
in menuconfig it's where you'd expect it to be.

I locally hacked that in, but when building defconfig +
SHADOW_CALL_STACK + SHADOW_CALL_STACK_VMAP, the build explodes as below:

| [mark@lakrids:~/src/linux]% usellvm 9.0.0 usekorg 8.1.0 make ARCH=arm64 CROSS_COMPILE=aarch64-linux- CC=clang -j56 -s
| arch/arm64/kernel/scs.c:28:7: error: use of undeclared identifier 'VMALLOC_START'
| VMALLOC_START, VMALLOC_END,
| ^
| arch/arm64/kernel/scs.c:28:22: error: use of undeclared identifier 'VMALLOC_END'
| VMALLOC_START, VMALLOC_END,
| ^
| arch/arm64/kernel/scs.c:29:7: error: use of undeclared identifier 'SCS_GFP'
| SCS_GFP, PAGE_KERNEL,
| ^
| arch/arm64/kernel/scs.c:29:16: error: use of undeclared identifier 'PAGE_KERNEL'
| SCS_GFP, PAGE_KERNEL,
| ^
| 4 errors generated.
| scripts/Makefile.build:265: recipe for target 'arch/arm64/kernel/scs.o' failed
| make[2]: *** [arch/arm64/kernel/scs.o] Error 1
| scripts/Makefile.build:509: recipe for target 'arch/arm64/kernel' failed
| make[1]: *** [arch/arm64/kernel] Error 2
| Makefile:1655: recipe for target 'arch/arm64' failed
| make: *** [arch/arm64] Error 2
| make: *** Waiting for unfinished jobs....

Other than that, this largely looks good to me!

Thanks,
Mark.

2019-11-15 18:37:29

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v5 05/14] add support for Clang's Shadow Call Stack (SCS)

On Fri, Nov 15, 2019 at 7:37 AM Mark Rutland <[email protected]> wrote:
> > +config SHADOW_CALL_STACK_VMAP
> > + bool
> > + depends on SHADOW_CALL_STACK
> > + help
> > + Use virtually mapped shadow call stacks. Selecting this option
> > + provides better stack exhaustion protection, but increases per-thread
> > + memory consumption as a full page is allocated for each shadow stack.
>
> The bool needs some display text to make it selectable.
>
> This should probably be below SHADOW_CALL_STACK so that when it shows up
> in menuconfig it's where you'd expect it to be.
>
> I locally hacked that in, but when building defconfig +
> SHADOW_CALL_STACK + SHADOW_CALL_STACK_VMAP, the build explodes as below:

Ugh, thanks for pointing this out. I'll fix this in v6.

Sami

2019-11-15 20:21:01

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v5 14/14] arm64: implement Shadow Call Stack

On Fri, Nov 15, 2019 at 7:20 AM Mark Rutland <[email protected]> wrote:
>
> On Tue, Nov 05, 2019 at 03:56:08PM -0800, Sami Tolvanen wrote:
> > This change implements shadow stack switching, initial SCS set-up,
> > and interrupt shadow stacks for arm64.
>
> Each CPU also has an overflow stack, and two SDEI stacks, which should
> presumably be given their own SCS. SDEI is effectively a software-NMI,
> so it should almost certainly have the same treatement as IRQ.

Makes sense. I'll take a look at adding shadow stacks for the SDEI handler.

> Can we please fold this comment into the one above, and have:
>
> /*
> * The callee-saved regs (x19-x29) should be preserved between
> * irq_stack_entry and irq_stack_exit.
> */
> .macro irq_stack_exit
> mov sp, x19
> #ifdef CONFIG_SHADOW_CALL_STACK
> mov x18, x20
> #endif
> .endm

Sure, I'll change this in the next version.

Sami

2019-11-18 23:17:38

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v5 14/14] arm64: implement Shadow Call Stack

On Fri, Nov 15, 2019 at 12:19:20PM -0800, Sami Tolvanen wrote:
> On Fri, Nov 15, 2019 at 7:20 AM Mark Rutland <[email protected]> wrote:
> >
> > On Tue, Nov 05, 2019 at 03:56:08PM -0800, Sami Tolvanen wrote:
> > > This change implements shadow stack switching, initial SCS set-up,
> > > and interrupt shadow stacks for arm64.
> >
> > Each CPU also has an overflow stack, and two SDEI stacks, which should
> > presumably be given their own SCS. SDEI is effectively a software-NMI,
> > so it should almost certainly have the same treatement as IRQ.
>
> Makes sense. I'll take a look at adding shadow stacks for the SDEI handler.

Mark, I wrote a preliminary patch for adding SDEI shadow stacks, but
turns out I don't really have a way to test the SDEI code. Does the approach
below look sane to you?

Sami


---
arch/arm64/include/asm/scs.h | 2 +
arch/arm64/include/asm/stacktrace.h | 4 --
arch/arm64/kernel/entry.S | 14 +++-
arch/arm64/kernel/scs.c | 106 +++++++++++++++++++++++-----
arch/arm64/kernel/sdei.c | 7 ++
5 files changed, 112 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
index c50d2b0c6c5f..8e327e14bc15 100644
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -9,6 +9,7 @@
#ifdef CONFIG_SHADOW_CALL_STACK

extern void scs_init_irq(void);
+extern int scs_init_sdei(void);

static __always_inline void scs_save(struct task_struct *tsk)
{
@@ -27,6 +28,7 @@ static inline void scs_overflow_check(struct task_struct *tsk)
#else /* CONFIG_SHADOW_CALL_STACK */

static inline void scs_init_irq(void) {}
+static inline int scs_init_sdei(void) { return 0; }
static inline void scs_save(struct task_struct *tsk) {}
static inline void scs_overflow_check(struct task_struct *tsk) {}

diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index b6cf32fb4efe..4d9b1f48dc39 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -68,10 +68,6 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk);

DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);

-#ifdef CONFIG_SHADOW_CALL_STACK
-DECLARE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
-#endif
-
static inline bool on_irq_stack(unsigned long sp,
struct stack_info *info)
{
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 5a02b61fc3e6..ac9dfb3da440 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1309,13 +1309,16 @@ ENTRY(__sdei_asm_handler)

mov x19, x1

+#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
+ ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
+#endif
+
#ifdef CONFIG_VMAP_STACK
/*
* entry.S may have been using sp as a scratch register, find whether
* this is a normal or critical event and switch to the appropriate
* stack for this CPU.
*/
- ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
cbnz w4, 1f
ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
b 2f
@@ -1325,6 +1328,15 @@ ENTRY(__sdei_asm_handler)
mov sp, x5
#endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* Use a separate shadow call stack for normal and critical events */
+ cbnz w4, 3f
+ ldr_this_cpu dst=x18, sym=sdei_shadow_call_stack_normal_ptr, tmp=x6
+ b 4f
+3: ldr_this_cpu dst=x18, sym=sdei_shadow_call_stack_critical_ptr, tmp=x6
+4:
+#endif
+
/*
* We may have interrupted userspace, or a guest, or exit-from or
* return-to either of these. We can't trust sp_el0, restore it.
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
index 9a1305a6eb5b..dddb7c56518b 100644
--- a/arch/arm64/kernel/scs.c
+++ b/arch/arm64/kernel/scs.c
@@ -10,31 +10,105 @@
#include <asm/pgtable.h>
#include <asm/scs.h>

-DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+#define DECLARE_SCS(name) \
+ DECLARE_PER_CPU(unsigned long *, name ## _ptr); \
+ DECLARE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name)

-#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
-DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
- __aligned(SCS_SIZE);
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+#define DEFINE_SCS(name) \
+ DEFINE_PER_CPU(unsigned long *, name ## _ptr)
+#else
+/* Allocate a static per-CPU shadow stack */
+#define DEFINE_SCS(name) \
+ DEFINE_PER_CPU(unsigned long *, name ## _ptr); \
+ DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \
+ __aligned(SCS_SIZE)
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+DECLARE_SCS(irq_shadow_call_stack);
+DECLARE_SCS(sdei_shadow_call_stack_normal);
+DECLARE_SCS(sdei_shadow_call_stack_critical);
+
+DEFINE_SCS(irq_shadow_call_stack);
+#ifdef CONFIG_ARM_SDE_INTERFACE
+DEFINE_SCS(sdei_shadow_call_stack_normal);
+DEFINE_SCS(sdei_shadow_call_stack_critical);
#endif

+static int scs_alloc_percpu(unsigned long * __percpu *ptr, int cpu)
+{
+ unsigned long *p;
+
+ p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ if (!p)
+ return -ENOMEM;
+ per_cpu(*ptr, cpu) = p;
+
+ return 0;
+}
+
+static void scs_free_percpu(unsigned long * __percpu *ptr, int cpu)
+{
+ unsigned long *p = per_cpu(*ptr, cpu);
+
+ if (p) {
+ per_cpu(*ptr, cpu) = NULL;
+ vfree(p);
+ }
+}
+
+static void scs_free_sdei(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ scs_free_percpu(&sdei_shadow_call_stack_normal_ptr, cpu);
+ scs_free_percpu(&sdei_shadow_call_stack_critical_ptr, cpu);
+ }
+}
+
void scs_init_irq(void)
{
int cpu;

for_each_possible_cpu(cpu) {
-#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
- unsigned long *p;
+ if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK_VMAP))
+ WARN_ON(scs_alloc_percpu(&irq_shadow_call_stack_ptr,
+ cpu));
+ else
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ per_cpu(irq_shadow_call_stack, cpu);
+ }
+}

- p = __vmalloc_node_range(SCS_SIZE, SCS_SIZE,
- VMALLOC_START, VMALLOC_END,
- GFP_SCS, PAGE_KERNEL,
- 0, cpu_to_node(cpu),
- __builtin_return_address(0));
+int scs_init_sdei(void)
+{
+ int cpu;

- per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
-#else
- per_cpu(irq_shadow_call_stack_ptr, cpu) =
- per_cpu(irq_shadow_call_stack, cpu);
-#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+ if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
+ return 0;
+
+ for_each_possible_cpu(cpu) {
+ if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK_VMAP)) {
+ if (scs_alloc_percpu(
+ &sdei_shadow_call_stack_normal_ptr, cpu) ||
+ scs_alloc_percpu(
+ &sdei_shadow_call_stack_critical_ptr, cpu)) {
+ scs_free_sdei();
+ return -ENOMEM;
+ }
+ } else {
+ per_cpu(sdei_shadow_call_stack_normal_ptr, cpu) =
+ per_cpu(sdei_shadow_call_stack_normal, cpu);
+ per_cpu(sdei_shadow_call_stack_critical_ptr, cpu) =
+ per_cpu(sdei_shadow_call_stack_critical, cpu);
+ }
}
+
+ return 0;
}
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index ea94cf8f9dc6..3e85017a9c8b 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -12,6 +12,7 @@
#include <asm/kprobes.h>
#include <asm/mmu.h>
#include <asm/ptrace.h>
+#include <asm/scs.h>
#include <asm/sections.h>
#include <asm/stacktrace.h>
#include <asm/sysreg.h>
@@ -161,6 +162,12 @@ unsigned long sdei_arch_get_entry_point(int conduit)
return 0;
}

+ if (scs_init_sdei()) {
+ if (IS_ENABLED(CONFIG_VMAP_STACK))
+ free_sdei_stacks();
+ return 0;
+ }
+
sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;

#ifdef CONFIG_UNMAP_KERNEL_AT_EL0

2019-12-06 22:14:39

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v6 00/15] add support for Clang's Shadow Call Stack

This patch series adds support for Clang's Shadow Call Stack
(SCS) mitigation, which uses a separately allocated shadow stack
to protect against return address overwrites. More information
can be found here:

https://clang.llvm.org/docs/ShadowCallStack.html

SCS provides better protection against traditional buffer
overflows than CONFIG_STACKPROTECTOR_*, but it should be noted
that SCS security guarantees in the kernel differ from the ones
documented for user space. The kernel must store addresses of
shadow stacks used by inactive tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

SCS is currently supported only on arm64, where the compiler
requires the x18 register to be reserved for holding the current
task's shadow stack pointer. Because of this, the series includes
patches from Ard to remove x18 usage from assembly code.

With -fsanitize=shadow-call-stack, the compiler injects
instructions to all non-leaf C functions to store the return
address to the shadow stack, and unconditionally load it again
before returning. As a result, SCS is currently incompatible
with features that rely on modifying function return addresses
in the kernel stack to alter control flow, such as function
graph tracing, although it may be possible to later change these
features to modify the shadow stack instead. A copy of the return
address is still kept in the kernel stack for compatibility with
stack unwinding, for example.

SCS has a minimal performance overhead, but allocating
shadow stacks increases kernel memory usage. The feature is
therefore mostly useful on hardware that lacks support for PAC
instructions.

Changes in v6:
- Updated comment in the EFI RT wrapper to include the
explanation from the commit message.
- Fixed the SHADOW_CALL_STACK_VMAP config option and the
compilation errors in scs_init_irq()
- Updated the comment in entry.S to Mark's suggestion
- Fixed the WARN_ON in scs_init() to trip only when the return
value for cpuhp_setup_state() is < 0.
- Removed ifdefs from the code in arch/arm64/kernel/scs.c and
added separate shadow stacks for the SDEI handler

Changes in v5:
- Updated the comment in __scs_base() to Mark's suggestion
- Changed all instances of uintptr_t to unsigned long
- Added allocation poisoning for KASAN to catch unintentional
shadow stack accesses; moved set_set_magic before poisoning
and switched scs_used() and scs_corrupted() to access the
buffer using READ_ONCE_NOCHECK() instead
- Changed scs_free() to check for NULL instead of zero
- Renamed SCS_CACHE_SIZE to NR_CACHED_SCS
- Added a warning if cpuhp_setup_state fails in scs_init()
- Dropped patches disabling kretprobes after confirming there's
no functional conflict with SCS instrumentation
- Added an explanation to the commit message why function graph
tracing and SCS are incompatible
- Removed the ifdefs from arch/arm64/mm/proc.S and added
comments explaining why we are saving and restoring x18
- Updated scs_check_usage format to include process information

Changes in v4:
- Fixed authorship for Ard's patches
- Added missing commit messages
- Commented code that clears SCS from thread_info
- Added a comment about SCS_END_MAGIC being non-canonical

Changes in v3:
- Switched to filter-out for removing SCS flags in Makefiles
- Changed the __noscs attribute to use __no_sanitize__("...")
instead of no_sanitize("...")
- Cleaned up inline function definitions and moved task_scs()
into a macro
- Cleaned up scs_free() and scs_magic()
- Moved SCS initialization into dup_task_struct() and removed
the now unused scs_task_init()
- Added comments to __scs_base() and scs_task_reset() to better
document design choices
- Changed copy_page to make the offset and bias explicit

Changes in v2:
- Changed Ard's KVM patch to use x29 instead of x18 for the
guest context, which makes restore_callee_saved_regs cleaner
- Updated help text (and commit messages) to point out
differences in security properties compared to user space SCS
- Cleaned up config options: removed the ROP protection choice,
replaced the CC_IS_CLANG dependency with an arch-specific
cc-option test, and moved disabling of incompatible config
options to an arch-specific Kconfig
- Added CC_FLAGS_SCS, which are filtered out where needed
instead of using DISABLE_SCS
- Added a __has_feature guard around __noscs for older clang
versions

Ard Biesheuvel (3):
arm64/lib: copy_page: avoid x18 register in assembler code
arm64: kvm: stop treating register x18 as caller save
arm64: kernel: avoid x18 in __cpu_soft_restart

Sami Tolvanen (12):
arm64: mm: avoid x18 in idmap_kpti_install_ng_mappings
add support for Clang's Shadow Call Stack (SCS)
scs: add accounting
scs: add support for stack usage debugging
arm64: disable function graph tracing with SCS
arm64: reserve x18 from general allocation with SCS
arm64: preserve x18 when CPU is suspended
arm64: efi: restore x18 if it was corrupted
arm64: vdso: disable Shadow Call Stack
arm64: disable SCS for hypervisor code
arm64: implement Shadow Call Stack
arm64: scs: add shadow stacks for SDEI

Makefile | 6 +
arch/Kconfig | 34 ++++
arch/arm64/Kconfig | 7 +-
arch/arm64/Makefile | 4 +
arch/arm64/include/asm/scs.h | 39 +++++
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/include/asm/thread_info.h | 3 +
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +
arch/arm64/kernel/cpu-reset.S | 4 +-
arch/arm64/kernel/efi-rt-wrapper.S | 11 +-
arch/arm64/kernel/entry.S | 45 ++++-
arch/arm64/kernel/head.S | 9 +
arch/arm64/kernel/irq.c | 2 +
arch/arm64/kernel/process.c | 2 +
arch/arm64/kernel/scs.c | 114 +++++++++++++
arch/arm64/kernel/sdei.c | 7 +
arch/arm64/kernel/smp.c | 4 +
arch/arm64/kernel/vdso/Makefile | 2 +-
arch/arm64/kvm/hyp/Makefile | 3 +
arch/arm64/kvm/hyp/entry.S | 45 ++---
arch/arm64/lib/copy_page.S | 38 ++---
arch/arm64/mm/proc.S | 77 +++++----
drivers/base/node.c | 6 +
fs/proc/meminfo.c | 4 +
include/linux/compiler-clang.h | 6 +
include/linux/compiler_types.h | 4 +
include/linux/mmzone.h | 3 +
include/linux/scs.h | 57 +++++++
init/init_task.c | 8 +
kernel/Makefile | 1 +
kernel/fork.c | 9 +
kernel/sched/core.c | 2 +
kernel/scs.c | 246 +++++++++++++++++++++++++++
mm/page_alloc.c | 6 +
mm/vmstat.c | 3 +
36 files changed, 737 insertions(+), 80 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c


base-commit: 3cf2890f29ab6fe491361761df558ef9191cb468
--
2.24.0.393.g34dc348eaf-goog

2019-12-06 22:14:44

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v6 02/15] arm64/lib: copy_page: avoid x18 register in assembler code

From: Ard Biesheuvel <[email protected]>

Register x18 will no longer be used as a caller save register in the
future, so stop using it in the copy_page() code.

Link: https://patchwork.kernel.org/patch/9836869/
Signed-off-by: Ard Biesheuvel <[email protected]>
[Sami: changed the offset and bias to be explicit]
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/arm64/lib/copy_page.S | 38 +++++++++++++++++++-------------------
1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S
index bbb8562396af..290dd3c5266c 100644
--- a/arch/arm64/lib/copy_page.S
+++ b/arch/arm64/lib/copy_page.S
@@ -34,45 +34,45 @@ alternative_else_nop_endif
ldp x14, x15, [x1, #96]
ldp x16, x17, [x1, #112]

- mov x18, #(PAGE_SIZE - 128)
+ add x0, x0, #256
add x1, x1, #128
1:
- subs x18, x18, #128
+ tst x0, #(PAGE_SIZE - 1)

alternative_if ARM64_HAS_NO_HW_PREFETCH
prfm pldl1strm, [x1, #384]
alternative_else_nop_endif

- stnp x2, x3, [x0]
+ stnp x2, x3, [x0, #-256]
ldp x2, x3, [x1]
- stnp x4, x5, [x0, #16]
+ stnp x4, x5, [x0, #16 - 256]
ldp x4, x5, [x1, #16]
- stnp x6, x7, [x0, #32]
+ stnp x6, x7, [x0, #32 - 256]
ldp x6, x7, [x1, #32]
- stnp x8, x9, [x0, #48]
+ stnp x8, x9, [x0, #48 - 256]
ldp x8, x9, [x1, #48]
- stnp x10, x11, [x0, #64]
+ stnp x10, x11, [x0, #64 - 256]
ldp x10, x11, [x1, #64]
- stnp x12, x13, [x0, #80]
+ stnp x12, x13, [x0, #80 - 256]
ldp x12, x13, [x1, #80]
- stnp x14, x15, [x0, #96]
+ stnp x14, x15, [x0, #96 - 256]
ldp x14, x15, [x1, #96]
- stnp x16, x17, [x0, #112]
+ stnp x16, x17, [x0, #112 - 256]
ldp x16, x17, [x1, #112]

add x0, x0, #128
add x1, x1, #128

- b.gt 1b
+ b.ne 1b

- stnp x2, x3, [x0]
- stnp x4, x5, [x0, #16]
- stnp x6, x7, [x0, #32]
- stnp x8, x9, [x0, #48]
- stnp x10, x11, [x0, #64]
- stnp x12, x13, [x0, #80]
- stnp x14, x15, [x0, #96]
- stnp x16, x17, [x0, #112]
+ stnp x2, x3, [x0, #-256]
+ stnp x4, x5, [x0, #16 - 256]
+ stnp x6, x7, [x0, #32 - 256]
+ stnp x8, x9, [x0, #48 - 256]
+ stnp x10, x11, [x0, #64 - 256]
+ stnp x12, x13, [x0, #80 - 256]
+ stnp x14, x15, [x0, #96 - 256]
+ stnp x16, x17, [x0, #112 - 256]

ret
ENDPROC(copy_page)
--
2.24.0.393.g34dc348eaf-goog

2019-12-06 22:14:45

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v6 01/15] arm64: mm: avoid x18 in idmap_kpti_install_ng_mappings

idmap_kpti_install_ng_mappings uses x18 as a temporary register, which
will result in a conflict when x18 is reserved. Use x16 and x17 instead
where needed.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/arm64/mm/proc.S | 63 ++++++++++++++++++++++----------------------
1 file changed, 32 insertions(+), 31 deletions(-)

diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index a1e0592d1fbc..fdabf40a83c8 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -250,15 +250,15 @@ ENTRY(idmap_kpti_install_ng_mappings)
/* We're the boot CPU. Wait for the others to catch up */
sevl
1: wfe
- ldaxr w18, [flag_ptr]
- eor w18, w18, num_cpus
- cbnz w18, 1b
+ ldaxr w17, [flag_ptr]
+ eor w17, w17, num_cpus
+ cbnz w17, 1b

/* We need to walk swapper, so turn off the MMU. */
pre_disable_mmu_workaround
- mrs x18, sctlr_el1
- bic x18, x18, #SCTLR_ELx_M
- msr sctlr_el1, x18
+ mrs x17, sctlr_el1
+ bic x17, x17, #SCTLR_ELx_M
+ msr sctlr_el1, x17
isb

/* Everybody is enjoying the idmap, so we can rewrite swapper. */
@@ -281,9 +281,9 @@ skip_pgd:
isb

/* We're done: fire up the MMU again */
- mrs x18, sctlr_el1
- orr x18, x18, #SCTLR_ELx_M
- msr sctlr_el1, x18
+ mrs x17, sctlr_el1
+ orr x17, x17, #SCTLR_ELx_M
+ msr sctlr_el1, x17
isb

/*
@@ -353,46 +353,47 @@ skip_pte:
b.ne do_pte
b next_pmd

+ .unreq cpu
+ .unreq num_cpus
+ .unreq swapper_pa
+ .unreq cur_pgdp
+ .unreq end_pgdp
+ .unreq pgd
+ .unreq cur_pudp
+ .unreq end_pudp
+ .unreq pud
+ .unreq cur_pmdp
+ .unreq end_pmdp
+ .unreq pmd
+ .unreq cur_ptep
+ .unreq end_ptep
+ .unreq pte
+
/* Secondary CPUs end up here */
__idmap_kpti_secondary:
/* Uninstall swapper before surgery begins */
- __idmap_cpu_set_reserved_ttbr1 x18, x17
+ __idmap_cpu_set_reserved_ttbr1 x16, x17

/* Increment the flag to let the boot CPU we're ready */
-1: ldxr w18, [flag_ptr]
- add w18, w18, #1
- stxr w17, w18, [flag_ptr]
+1: ldxr w16, [flag_ptr]
+ add w16, w16, #1
+ stxr w17, w16, [flag_ptr]
cbnz w17, 1b

/* Wait for the boot CPU to finish messing around with swapper */
sevl
1: wfe
- ldxr w18, [flag_ptr]
- cbnz w18, 1b
+ ldxr w16, [flag_ptr]
+ cbnz w16, 1b

/* All done, act like nothing happened */
- offset_ttbr1 swapper_ttb, x18
+ offset_ttbr1 swapper_ttb, x16
msr ttbr1_el1, swapper_ttb
isb
ret

- .unreq cpu
- .unreq num_cpus
- .unreq swapper_pa
.unreq swapper_ttb
.unreq flag_ptr
- .unreq cur_pgdp
- .unreq end_pgdp
- .unreq pgd
- .unreq cur_pudp
- .unreq end_pudp
- .unreq pud
- .unreq cur_pmdp
- .unreq end_pmdp
- .unreq pmd
- .unreq cur_ptep
- .unreq end_ptep
- .unreq pte
ENDPROC(idmap_kpti_install_ng_mappings)
.popsection
#endif
--
2.24.0.393.g34dc348eaf-goog

2019-12-06 22:14:50

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v6 03/15] arm64: kvm: stop treating register x18 as caller save

From: Ard Biesheuvel <[email protected]>

In preparation of reserving x18, stop treating it as caller save in
the KVM guest entry/exit code. Currently, the code assumes there is
no need to preserve it for the host, given that it would have been
assumed clobbered anyway by the function call to __guest_enter().
Instead, preserve its value and restore it upon return.

Link: https://patchwork.kernel.org/patch/9836891/
Signed-off-by: Ard Biesheuvel <[email protected]>
[Sami: updated commit message, switched from x18 to x29 for the guest context]
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Marc Zyngier <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/arm64/kvm/hyp/entry.S | 45 ++++++++++++++++++++------------------
1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S
index e5cc8d66bf53..0c6832ec52b1 100644
--- a/arch/arm64/kvm/hyp/entry.S
+++ b/arch/arm64/kvm/hyp/entry.S
@@ -22,7 +22,12 @@
.text
.pushsection .hyp.text, "ax"

+/*
+ * We treat x18 as callee-saved as the host may use it as a platform
+ * register (e.g. for shadow call stack).
+ */
.macro save_callee_saved_regs ctxt
+ str x18, [\ctxt, #CPU_XREG_OFFSET(18)]
stp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
stp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
stp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
@@ -32,6 +37,8 @@
.endm

.macro restore_callee_saved_regs ctxt
+ // We require \ctxt is not x18-x28
+ ldr x18, [\ctxt, #CPU_XREG_OFFSET(18)]
ldp x19, x20, [\ctxt, #CPU_XREG_OFFSET(19)]
ldp x21, x22, [\ctxt, #CPU_XREG_OFFSET(21)]
ldp x23, x24, [\ctxt, #CPU_XREG_OFFSET(23)]
@@ -48,7 +55,7 @@ ENTRY(__guest_enter)
// x0: vcpu
// x1: host context
// x2-x17: clobbered by macros
- // x18: guest context
+ // x29: guest context

// Store the host regs
save_callee_saved_regs x1
@@ -67,31 +74,28 @@ alternative_else_nop_endif
ret

1:
- add x18, x0, #VCPU_CONTEXT
+ add x29, x0, #VCPU_CONTEXT

// Macro ptrauth_switch_to_guest format:
// ptrauth_switch_to_guest(guest cxt, tmp1, tmp2, tmp3)
// The below macro to restore guest keys is not implemented in C code
// as it may cause Pointer Authentication key signing mismatch errors
// when this feature is enabled for kernel code.
- ptrauth_switch_to_guest x18, x0, x1, x2
+ ptrauth_switch_to_guest x29, x0, x1, x2

// Restore guest regs x0-x17
- ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)]
- ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)]
- ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)]
- ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)]
- ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)]
- ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)]
- ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)]
- ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)]
- ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)]
-
- // Restore guest regs x19-x29, lr
- restore_callee_saved_regs x18
-
- // Restore guest reg x18
- ldr x18, [x18, #CPU_XREG_OFFSET(18)]
+ ldp x0, x1, [x29, #CPU_XREG_OFFSET(0)]
+ ldp x2, x3, [x29, #CPU_XREG_OFFSET(2)]
+ ldp x4, x5, [x29, #CPU_XREG_OFFSET(4)]
+ ldp x6, x7, [x29, #CPU_XREG_OFFSET(6)]
+ ldp x8, x9, [x29, #CPU_XREG_OFFSET(8)]
+ ldp x10, x11, [x29, #CPU_XREG_OFFSET(10)]
+ ldp x12, x13, [x29, #CPU_XREG_OFFSET(12)]
+ ldp x14, x15, [x29, #CPU_XREG_OFFSET(14)]
+ ldp x16, x17, [x29, #CPU_XREG_OFFSET(16)]
+
+ // Restore guest regs x18-x29, lr
+ restore_callee_saved_regs x29

// Do not touch any register after this!
eret
@@ -114,7 +118,7 @@ ENTRY(__guest_exit)
// Retrieve the guest regs x0-x1 from the stack
ldp x2, x3, [sp], #16 // x0, x1

- // Store the guest regs x0-x1 and x4-x18
+ // Store the guest regs x0-x1 and x4-x17
stp x2, x3, [x1, #CPU_XREG_OFFSET(0)]
stp x4, x5, [x1, #CPU_XREG_OFFSET(4)]
stp x6, x7, [x1, #CPU_XREG_OFFSET(6)]
@@ -123,9 +127,8 @@ ENTRY(__guest_exit)
stp x12, x13, [x1, #CPU_XREG_OFFSET(12)]
stp x14, x15, [x1, #CPU_XREG_OFFSET(14)]
stp x16, x17, [x1, #CPU_XREG_OFFSET(16)]
- str x18, [x1, #CPU_XREG_OFFSET(18)]

- // Store the guest regs x19-x29, lr
+ // Store the guest regs x18-x29, lr
save_callee_saved_regs x1

get_host_ctxt x2, x3
--
2.24.0.393.g34dc348eaf-goog

2019-12-06 22:14:56

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v6 04/15] arm64: kernel: avoid x18 in __cpu_soft_restart

From: Ard Biesheuvel <[email protected]>

The code in __cpu_soft_restart() uses x18 as an arbitrary temp register,
which will shortly be disallowed. So use x8 instead.

Link: https://patchwork.kernel.org/patch/9836877/
Signed-off-by: Ard Biesheuvel <[email protected]>
[Sami: updated commit message]
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/kernel/cpu-reset.S | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S
index 6ea337d464c4..32c7bf858dd9 100644
--- a/arch/arm64/kernel/cpu-reset.S
+++ b/arch/arm64/kernel/cpu-reset.S
@@ -42,11 +42,11 @@ ENTRY(__cpu_soft_restart)
mov x0, #HVC_SOFT_RESTART
hvc #0 // no return

-1: mov x18, x1 // entry
+1: mov x8, x1 // entry
mov x0, x2 // arg0
mov x1, x3 // arg1
mov x2, x4 // arg2
- br x18
+ br x8
ENDPROC(__cpu_soft_restart)

.popsection
--
2.24.0.393.g34dc348eaf-goog

2019-12-06 22:15:07

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v6 05/15] add support for Clang's Shadow Call Stack (SCS)

This change adds generic support for Clang's Shadow Call Stack,
which uses a shadow stack to protect return addresses from being
overwritten by an attacker. Details are available here:

https://clang.llvm.org/docs/ShadowCallStack.html

Note that security guarantees in the kernel differ from the
ones documented for user space. The kernel must store addresses
of shadow stacks used by other tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Miguel Ojeda <[email protected]>
---
Makefile | 6 ++
arch/Kconfig | 34 ++++++
include/linux/compiler-clang.h | 6 ++
include/linux/compiler_types.h | 4 +
include/linux/scs.h | 57 ++++++++++
init/init_task.c | 8 ++
kernel/Makefile | 1 +
kernel/fork.c | 9 ++
kernel/sched/core.c | 2 +
kernel/scs.c | 187 +++++++++++++++++++++++++++++++++
10 files changed, 314 insertions(+)
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c

diff --git a/Makefile b/Makefile
index 999a197d67d2..1b76c1ea2a02 100644
--- a/Makefile
+++ b/Makefile
@@ -845,6 +845,12 @@ ifdef CONFIG_LIVEPATCH
KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
endif

+ifdef CONFIG_SHADOW_CALL_STACK
+CC_FLAGS_SCS := -fsanitize=shadow-call-stack
+KBUILD_CFLAGS += $(CC_FLAGS_SCS)
+export CC_FLAGS_SCS
+endif
+
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)

diff --git a/arch/Kconfig b/arch/Kconfig
index 48b5e103bdb0..1b16aa9a3fe5 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -521,6 +521,40 @@ config STACKPROTECTOR_STRONG
about 20% of all kernel functions, which increases the kernel code
size by about 2%.

+config ARCH_SUPPORTS_SHADOW_CALL_STACK
+ bool
+ help
+ An architecture should select this if it supports Clang's Shadow
+ Call Stack, has asm/scs.h, and implements runtime support for shadow
+ stack switching.
+
+config SHADOW_CALL_STACK
+ bool "Clang Shadow Call Stack"
+ depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
+ help
+ This option enables Clang's Shadow Call Stack, which uses a
+ shadow stack to protect function return addresses from being
+ overwritten by an attacker. More information can be found from
+ Clang's documentation:
+
+ https://clang.llvm.org/docs/ShadowCallStack.html
+
+ Note that security guarantees in the kernel differ from the ones
+ documented for user space. The kernel must store addresses of shadow
+ stacks used by other tasks and interrupt handlers in memory, which
+ means an attacker capable reading and writing arbitrary memory may
+ be able to locate them and hijack control flow by modifying shadow
+ stacks that are not currently in use.
+
+config SHADOW_CALL_STACK_VMAP
+ bool "Use virtually mapped shadow call stacks"
+ depends on SHADOW_CALL_STACK
+ help
+ Use virtually mapped shadow call stacks. Selecting this option
+ provides better stack exhaustion protection, but increases per-thread
+ memory consumption as a full page is allocated for each shadow stack.
+
+
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 333a6695a918..18fc4d29ef27 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -42,3 +42,9 @@
* compilers, like ICC.
*/
#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#if __has_feature(shadow_call_stack)
+# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
+#else
+# define __noscs
+#endif
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 72393a8c1a6c..be5d5be4b1ae 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -202,6 +202,10 @@ struct ftrace_likely_data {
# define randomized_struct_fields_end
#endif

+#ifndef __noscs
+# define __noscs
+#endif
+
#ifndef asm_volatile_goto
#define asm_volatile_goto(x...) asm goto(x)
#endif
diff --git a/include/linux/scs.h b/include/linux/scs.h
new file mode 100644
index 000000000000..c5572fd770b0
--- /dev/null
+++ b/include/linux/scs.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#ifndef _LINUX_SCS_H
+#define _LINUX_SCS_H
+
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+/*
+ * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
+ * architecture) provided ~40% safety margin on stack usage while keeping
+ * memory allocation overhead reasonable.
+ */
+#define SCS_SIZE 1024UL
+#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
+
+/*
+ * A random number outside the kernel's virtual address space to mark the
+ * end of the shadow stack.
+ */
+#define SCS_END_MAGIC 0xaf0194819b1635f6UL
+
+#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack)
+
+static inline void task_set_scs(struct task_struct *tsk, void *s)
+{
+ task_scs(tsk) = s;
+}
+
+extern void scs_init(void);
+extern void scs_task_reset(struct task_struct *tsk);
+extern int scs_prepare(struct task_struct *tsk, int node);
+extern bool scs_corrupted(struct task_struct *tsk);
+extern void scs_release(struct task_struct *tsk);
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+#define task_scs(tsk) NULL
+
+static inline void task_set_scs(struct task_struct *tsk, void *s) {}
+static inline void scs_init(void) {}
+static inline void scs_task_reset(struct task_struct *tsk) {}
+static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
+static inline bool scs_corrupted(struct task_struct *tsk) { return false; }
+static inline void scs_release(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* _LINUX_SCS_H */
diff --git a/init/init_task.c b/init/init_task.c
index 9e5cbe5eab7b..cbd40460e903 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/audit.h>
#include <linux/numa.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <linux/uaccess.h>
@@ -184,6 +185,13 @@ struct task_struct init_task
};
EXPORT_SYMBOL(init_task);

+#ifdef CONFIG_SHADOW_CALL_STACK
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
+ __aligned(SCS_SIZE) = {
+ [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
+};
+#endif
+
/*
* Initial thread structure. Alignment of this is handled by a special
* linker map entry.
diff --git a/kernel/Makefile b/kernel/Makefile
index f2cc0d118a0b..06231f936a7a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-$(CONFIG_PERF_EVENTS) += events/

diff --git a/kernel/fork.c b/kernel/fork.c
index 2508a4f238a3..bf52528f9b36 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,6 +94,7 @@
#include <linux/thread_info.h>
#include <linux/stackleak.h>
#include <linux/kasan.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -454,6 +455,8 @@ void put_task_stack(struct task_struct *tsk)

void free_task(struct task_struct *tsk)
{
+ scs_release(tsk);
+
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
@@ -837,6 +840,8 @@ void __init fork_init(void)
NULL, free_vm_stack_cache);
#endif

+ scs_init();
+
lockdep_init_task(&init_task);
uprobes_init();
}
@@ -896,6 +901,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_stack;

+ err = scs_prepare(tsk, node);
+ if (err)
+ goto free_stack;
+
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 90e4b00ace89..a181c536e12e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11,6 +11,7 @@
#include <linux/nospec.h>

#include <linux/kcov.h>
+#include <linux/scs.h>

#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -6038,6 +6039,7 @@ void init_idle(struct task_struct *idle, int cpu)
idle->se.exec_start = sched_clock();
idle->flags |= PF_IDLE;

+ scs_task_reset(idle);
kasan_unpoison_task_stack(idle);

#ifdef CONFIG_SMP
diff --git a/kernel/scs.c b/kernel/scs.c
new file mode 100644
index 000000000000..28abed21950c
--- /dev/null
+++ b/kernel/scs.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/kasan.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/scs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/scs.h>
+
+static inline void *__scs_base(struct task_struct *tsk)
+{
+ /*
+ * To minimize risk the of exposure, architectures may clear a
+ * task's thread_info::shadow_call_stack while that task is
+ * running, and only save/restore the active shadow call stack
+ * pointer when the usual register may be clobbered (e.g. across
+ * context switches).
+ *
+ * The shadow call stack is aligned to SCS_SIZE, and grows
+ * upwards, so we can mask out the low bits to extract the base
+ * when the task is not running.
+ */
+ return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
+}
+
+static inline unsigned long *scs_magic(void *s)
+{
+ return (unsigned long *)(s + SCS_SIZE) - 1;
+}
+
+static inline void scs_set_magic(void *s)
+{
+ *scs_magic(s) = SCS_END_MAGIC;
+}
+
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+
+/* Matches NR_CACHED_STACKS for VMAP_STACK */
+#define NR_CACHED_SCS 2
+static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]);
+
+static void *scs_alloc(int node)
+{
+ int i;
+ void *s;
+
+ for (i = 0; i < NR_CACHED_SCS; i++) {
+ s = this_cpu_xchg(scs_cache[i], NULL);
+ if (s) {
+ memset(s, 0, SCS_SIZE);
+ goto out;
+ }
+ }
+
+ /*
+ * We allocate a full page for the shadow stack, which should be
+ * more than we need. Check the assumption nevertheless.
+ */
+ BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);
+
+ s = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL, 0,
+ node, __builtin_return_address(0));
+
+out:
+ if (s)
+ scs_set_magic(s);
+ /* TODO: poison for KASAN, unpoison in scs_free */
+
+ return s;
+}
+
+static void scs_free(void *s)
+{
+ int i;
+
+ for (i = 0; i < NR_CACHED_SCS; i++)
+ if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL)
+ return;
+
+ vfree_atomic(s);
+}
+
+static int scs_cleanup(unsigned int cpu)
+{
+ int i;
+ void **cache = per_cpu_ptr(scs_cache, cpu);
+
+ for (i = 0; i < NR_CACHED_SCS; i++) {
+ vfree(cache[i]);
+ cache[i] = NULL;
+ }
+
+ return 0;
+}
+
+void __init scs_init(void)
+{
+ WARN_ON(cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
+ scs_cleanup) < 0);
+}
+
+#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */
+
+static struct kmem_cache *scs_cache;
+
+static inline void *scs_alloc(int node)
+{
+ void *s;
+
+ s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
+ if (s) {
+ scs_set_magic(s);
+ /*
+ * Poison the allocation to catch unintentional accesses to
+ * the shadow stack when KASAN is enabled.
+ */
+ kasan_poison_object_data(scs_cache, s);
+ }
+
+ return s;
+}
+
+static inline void scs_free(void *s)
+{
+ kasan_unpoison_object_data(scs_cache, s);
+ kmem_cache_free(scs_cache, s);
+}
+
+void __init scs_init(void)
+{
+ scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
+ 0, NULL);
+ WARN_ON(!scs_cache);
+}
+
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+void scs_task_reset(struct task_struct *tsk)
+{
+ /*
+ * Reset the shadow stack to the base address in case the task
+ * is reused.
+ */
+ task_set_scs(tsk, __scs_base(tsk));
+}
+
+int scs_prepare(struct task_struct *tsk, int node)
+{
+ void *s;
+
+ s = scs_alloc(node);
+ if (!s)
+ return -ENOMEM;
+
+ task_set_scs(tsk, s);
+ return 0;
+}
+
+bool scs_corrupted(struct task_struct *tsk)
+{
+ unsigned long *magic = scs_magic(__scs_base(tsk));
+
+ return READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
+}
+
+void scs_release(struct task_struct *tsk)
+{
+ void *s;
+
+ s = __scs_base(tsk);
+ if (!s)
+ return;
+
+ WARN_ON(scs_corrupted(tsk));
+
+ task_set_scs(tsk, NULL);
+ scs_free(s);
+}
--
2.24.0.393.g34dc348eaf-goog

2019-12-06 22:15:11

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v6 06/15] scs: add accounting

This change adds accounting for the memory allocated for shadow stacks.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
drivers/base/node.c | 6 ++++++
fs/proc/meminfo.c | 4 ++++
include/linux/mmzone.h | 3 +++
kernel/scs.c | 20 ++++++++++++++++++++
mm/page_alloc.c | 6 ++++++
mm/vmstat.c | 3 +++
6 files changed, 42 insertions(+)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 98a31bafc8a2..874a8b428438 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d AnonPages: %8lu kB\n"
"Node %d Shmem: %8lu kB\n"
"Node %d KernelStack: %8lu kB\n"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ "Node %d ShadowCallStack:%8lu kB\n"
+#endif
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
@@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8c1f1bb1a5ce..49768005a79e 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB));
+#ifdef CONFIG_SHADOW_CALL_STACK
+ seq_printf(m, "ShadowCallStack:%8lu kB\n",
+ global_zone_page_state(NR_KERNEL_SCS_BYTES) / 1024);
+#endif
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 89d8ff06c9ce..dfeb8a5a7f7e 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -200,6 +200,9 @@ enum zone_stat_item {
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ NR_KERNEL_SCS_BYTES, /* measured in bytes */
+#endif
/* Second 128 byte cacheline */
NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
diff --git a/kernel/scs.c b/kernel/scs.c
index 28abed21950c..5245e992c692 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -12,6 +12,7 @@
#include <linux/scs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/vmstat.h>
#include <asm/scs.h>

static inline void *__scs_base(struct task_struct *tsk)
@@ -89,6 +90,11 @@ static void scs_free(void *s)
vfree_atomic(s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return vmalloc_to_page(__scs_base(tsk));
+}
+
static int scs_cleanup(unsigned int cpu)
{
int i;
@@ -135,6 +141,11 @@ static inline void scs_free(void *s)
kmem_cache_free(scs_cache, s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return virt_to_page(__scs_base(tsk));
+}
+
void __init scs_init(void)
{
scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
@@ -153,6 +164,12 @@ void scs_task_reset(struct task_struct *tsk)
task_set_scs(tsk, __scs_base(tsk));
}

+static void scs_account(struct task_struct *tsk, int account)
+{
+ mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_BYTES,
+ account * SCS_SIZE);
+}
+
int scs_prepare(struct task_struct *tsk, int node)
{
void *s;
@@ -162,6 +179,8 @@ int scs_prepare(struct task_struct *tsk, int node)
return -ENOMEM;

task_set_scs(tsk, s);
+ scs_account(tsk, 1);
+
return 0;
}

@@ -182,6 +201,7 @@ void scs_release(struct task_struct *tsk)

WARN_ON(scs_corrupted(tsk));

+ scs_account(tsk, -1);
task_set_scs(tsk, NULL);
scs_free(s);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 4785a8a2040e..6073556fc99a 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5365,6 +5365,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" managed:%lukB"
" mlocked:%lukB"
" kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ " shadow_call_stack:%lukB"
+#endif
" pagetables:%lukB"
" bounce:%lukB"
" free_pcp:%lukB"
@@ -5387,6 +5390,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)),
zone_page_state(zone, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ zone_page_state(zone, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
K(zone_page_state(zone, NR_PAGETABLE)),
K(zone_page_state(zone, NR_BOUNCE)),
K(free_pcp),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 78d53378db99..d0650391c8c1 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1119,6 +1119,9 @@ const char * const vmstat_text[] = {
"nr_mlock",
"nr_page_table_pages",
"nr_kernel_stack",
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ "nr_shadow_call_stack_bytes",
+#endif
"nr_bounce",
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
--
2.24.0.393.g34dc348eaf-goog

2019-12-06 22:15:16

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v6 07/15] scs: add support for stack usage debugging

Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks. When enabled,
also prints out the highest shadow stack usage per process.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
kernel/scs.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)

diff --git a/kernel/scs.c b/kernel/scs.c
index 5245e992c692..ad74d13f2c0f 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -184,6 +184,44 @@ int scs_prepare(struct task_struct *tsk, int node)
return 0;
}

+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long scs_used(struct task_struct *tsk)
+{
+ unsigned long *p = __scs_base(tsk);
+ unsigned long *end = scs_magic(p);
+ unsigned long s = (unsigned long)p;
+
+ while (p < end && READ_ONCE_NOCHECK(*p))
+ p++;
+
+ return (unsigned long)p - s;
+}
+
+static void scs_check_usage(struct task_struct *tsk)
+{
+ static DEFINE_SPINLOCK(lock);
+ static unsigned long highest;
+ unsigned long used = scs_used(tsk);
+
+ if (used <= highest)
+ return;
+
+ spin_lock(&lock);
+
+ if (used > highest) {
+ pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
+ tsk->comm, task_pid_nr(tsk), used);
+ highest = used;
+ }
+
+ spin_unlock(&lock);
+}
+#else
+static inline void scs_check_usage(struct task_struct *tsk)
+{
+}
+#endif
+
bool scs_corrupted(struct task_struct *tsk)
{
unsigned long *magic = scs_magic(__scs_base(tsk));
@@ -200,6 +238,7 @@ void scs_release(struct task_struct *tsk)
return;

WARN_ON(scs_corrupted(tsk));
+ scs_check_usage(tsk);

scs_account(tsk, -1);
task_set_scs(tsk, NULL);
--
2.24.0.393.g34dc348eaf-goog

2019-12-06 22:15:39

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v6 15/15] arm64: scs: add shadow stacks for SDEI

This change adds per-CPU shadow call stacks for the SDEI handler.
Similarly to how the kernel stacks are handled, we add separate shadow
stacks for normal and critical events.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/include/asm/scs.h | 2 +
arch/arm64/kernel/entry.S | 14 ++++-
arch/arm64/kernel/scs.c | 106 +++++++++++++++++++++++++++++------
arch/arm64/kernel/sdei.c | 7 +++
4 files changed, 112 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
index c50d2b0c6c5f..8e327e14bc15 100644
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -9,6 +9,7 @@
#ifdef CONFIG_SHADOW_CALL_STACK

extern void scs_init_irq(void);
+extern int scs_init_sdei(void);

static __always_inline void scs_save(struct task_struct *tsk)
{
@@ -27,6 +28,7 @@ static inline void scs_overflow_check(struct task_struct *tsk)
#else /* CONFIG_SHADOW_CALL_STACK */

static inline void scs_init_irq(void) {}
+static inline int scs_init_sdei(void) { return 0; }
static inline void scs_save(struct task_struct *tsk) {}
static inline void scs_overflow_check(struct task_struct *tsk) {}

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 7aa2d366b2df..9327c3d21b64 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1048,13 +1048,16 @@ ENTRY(__sdei_asm_handler)

mov x19, x1

+#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
+ ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
+#endif
+
#ifdef CONFIG_VMAP_STACK
/*
* entry.S may have been using sp as a scratch register, find whether
* this is a normal or critical event and switch to the appropriate
* stack for this CPU.
*/
- ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
cbnz w4, 1f
ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
b 2f
@@ -1064,6 +1067,15 @@ ENTRY(__sdei_asm_handler)
mov sp, x5
#endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* Use a separate shadow call stack for normal and critical events */
+ cbnz w4, 3f
+ ldr_this_cpu dst=x18, sym=sdei_shadow_call_stack_normal_ptr, tmp=x6
+ b 4f
+3: ldr_this_cpu dst=x18, sym=sdei_shadow_call_stack_critical_ptr, tmp=x6
+4:
+#endif
+
/*
* We may have interrupted userspace, or a guest, or exit-from or
* return-to either of these. We can't trust sp_el0, restore it.
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
index eaadf5430baa..dddb7c56518b 100644
--- a/arch/arm64/kernel/scs.c
+++ b/arch/arm64/kernel/scs.c
@@ -10,31 +10,105 @@
#include <asm/pgtable.h>
#include <asm/scs.h>

-DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+#define DECLARE_SCS(name) \
+ DECLARE_PER_CPU(unsigned long *, name ## _ptr); \
+ DECLARE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name)

-#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
-DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
- __aligned(SCS_SIZE);
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+#define DEFINE_SCS(name) \
+ DEFINE_PER_CPU(unsigned long *, name ## _ptr)
+#else
+/* Allocate a static per-CPU shadow stack */
+#define DEFINE_SCS(name) \
+ DEFINE_PER_CPU(unsigned long *, name ## _ptr); \
+ DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \
+ __aligned(SCS_SIZE)
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+DECLARE_SCS(irq_shadow_call_stack);
+DECLARE_SCS(sdei_shadow_call_stack_normal);
+DECLARE_SCS(sdei_shadow_call_stack_critical);
+
+DEFINE_SCS(irq_shadow_call_stack);
+#ifdef CONFIG_ARM_SDE_INTERFACE
+DEFINE_SCS(sdei_shadow_call_stack_normal);
+DEFINE_SCS(sdei_shadow_call_stack_critical);
#endif

+static int scs_alloc_percpu(unsigned long * __percpu *ptr, int cpu)
+{
+ unsigned long *p;
+
+ p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ if (!p)
+ return -ENOMEM;
+ per_cpu(*ptr, cpu) = p;
+
+ return 0;
+}
+
+static void scs_free_percpu(unsigned long * __percpu *ptr, int cpu)
+{
+ unsigned long *p = per_cpu(*ptr, cpu);
+
+ if (p) {
+ per_cpu(*ptr, cpu) = NULL;
+ vfree(p);
+ }
+}
+
+static void scs_free_sdei(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ scs_free_percpu(&sdei_shadow_call_stack_normal_ptr, cpu);
+ scs_free_percpu(&sdei_shadow_call_stack_critical_ptr, cpu);
+ }
+}
+
void scs_init_irq(void)
{
int cpu;

for_each_possible_cpu(cpu) {
-#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
- unsigned long *p;
+ if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK_VMAP))
+ WARN_ON(scs_alloc_percpu(&irq_shadow_call_stack_ptr,
+ cpu));
+ else
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ per_cpu(irq_shadow_call_stack, cpu);
+ }
+}

- p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
- VMALLOC_START, VMALLOC_END,
- GFP_SCS, PAGE_KERNEL,
- 0, cpu_to_node(cpu),
- __builtin_return_address(0));
+int scs_init_sdei(void)
+{
+ int cpu;

- per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
-#else
- per_cpu(irq_shadow_call_stack_ptr, cpu) =
- per_cpu(irq_shadow_call_stack, cpu);
-#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+ if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
+ return 0;
+
+ for_each_possible_cpu(cpu) {
+ if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK_VMAP)) {
+ if (scs_alloc_percpu(
+ &sdei_shadow_call_stack_normal_ptr, cpu) ||
+ scs_alloc_percpu(
+ &sdei_shadow_call_stack_critical_ptr, cpu)) {
+ scs_free_sdei();
+ return -ENOMEM;
+ }
+ } else {
+ per_cpu(sdei_shadow_call_stack_normal_ptr, cpu) =
+ per_cpu(sdei_shadow_call_stack_normal, cpu);
+ per_cpu(sdei_shadow_call_stack_critical_ptr, cpu) =
+ per_cpu(sdei_shadow_call_stack_critical, cpu);
+ }
}
+
+ return 0;
}
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index d6259dac62b6..2854b9f7760a 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -13,6 +13,7 @@
#include <asm/kprobes.h>
#include <asm/mmu.h>
#include <asm/ptrace.h>
+#include <asm/scs.h>
#include <asm/sections.h>
#include <asm/stacktrace.h>
#include <asm/sysreg.h>
@@ -162,6 +163,12 @@ unsigned long sdei_arch_get_entry_point(int conduit)
return 0;
}

+ if (scs_init_sdei()) {
+ if (IS_ENABLED(CONFIG_VMAP_STACK))
+ free_sdei_stacks();
+ return 0;
+ }
+
sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;

#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
--
2.24.0.393.g34dc348eaf-goog

2019-12-06 22:15:59

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v6 13/15] arm64: disable SCS for hypervisor code

Filter out CC_FLAGS_SCS for code that runs at a different exception
level.

Suggested-by: Steven Rostedt (VMware) <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/arm64/kvm/hyp/Makefile | 3 +++
1 file changed, 3 insertions(+)

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index ea710f674cb6..17ea3da325e9 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -28,3 +28,6 @@ GCOV_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
+
+# remove the SCS flags from all objects in this directory
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
--
2.24.0.393.g34dc348eaf-goog

2019-12-06 22:16:02

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v6 12/15] arm64: vdso: disable Shadow Call Stack

Shadow stacks are only available in the kernel, so disable SCS
instrumentation for the vDSO.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/arm64/kernel/vdso/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index dd2514bb1511..a87a4f11724e 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -25,7 +25,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING

VDSO_LDFLAGS := -Bsymbolic

-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS)
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.24.0.393.g34dc348eaf-goog

2019-12-06 22:16:08

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v6 09/15] arm64: reserve x18 from general allocation with SCS

Reserve the x18 register from general allocation when SCS is enabled,
because the compiler uses the register to store the current task's
shadow stack pointer. Note that all external kernel modules must also be
compiled with -ffixed-x18 if the kernel has SCS enabled.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/Makefile | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 1fbe24d4fdb6..e69736fc1106 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -72,6 +72,10 @@ stack_protector_prepare: prepare0
include/generated/asm-offsets.h))
endif

+ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
+KBUILD_CFLAGS += -ffixed-x18
+endif
+
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
CHECKFLAGS += -D__AARCH64EB__
--
2.24.0.393.g34dc348eaf-goog

2019-12-06 22:16:08

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v6 08/15] arm64: disable function graph tracing with SCS

The graph tracer hooks returns by modifying frame records on the
(regular) stack, but with SCS the return address is taken from the
shadow stack, and the value in the frame record has no effect. As we
don't currently have a mechanism to determine the corresponding slot
on the shadow stack (and to pass this through the ftrace
infrastructure), for now let's disable the graph tracer when SCS is
enabled.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/arm64/Kconfig | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index b1b4476ddb83..49e5f94ff4af 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -149,7 +149,7 @@ config ARM64
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_TRACER
select HAVE_FUNCTION_ERROR_INJECTION
- select HAVE_FUNCTION_GRAPH_TRACER
+ select HAVE_FUNCTION_GRAPH_TRACER if !SHADOW_CALL_STACK
select HAVE_GCC_PLUGINS
select HAVE_HW_BREAKPOINT if PERF_EVENTS
select HAVE_IRQ_TIME_ACCOUNTING
--
2.24.0.393.g34dc348eaf-goog

2019-12-06 22:16:15

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v6 10/15] arm64: preserve x18 when CPU is suspended

Don't lose the current task's shadow stack when the CPU is suspended.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/mm/proc.S | 14 ++++++++++++++
2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 8939c87c4dce..0cde2f473971 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H

-#define NR_CTX_REGS 12
+#define NR_CTX_REGS 13
#define NR_CALLEE_SAVED_REGS 12

/*
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index fdabf40a83c8..5c8219c55948 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -49,6 +49,8 @@
* cpu_do_suspend - save CPU registers context
*
* x0: virtual address of context pointer
+ *
+ * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
*/
ENTRY(cpu_do_suspend)
mrs x2, tpidr_el0
@@ -73,6 +75,11 @@ alternative_endif
stp x8, x9, [x0, #48]
stp x10, x11, [x0, #64]
stp x12, x13, [x0, #80]
+ /*
+ * Save x18 as it may be used as a platform register, e.g. by shadow
+ * call stack.
+ */
+ str x18, [x0, #96]
ret
ENDPROC(cpu_do_suspend)

@@ -89,6 +96,13 @@ ENTRY(cpu_do_resume)
ldp x9, x10, [x0, #48]
ldp x11, x12, [x0, #64]
ldp x13, x14, [x0, #80]
+ /*
+ * Restore x18, as it may be used as a platform register, and clear
+ * the buffer to minimize the risk of exposure when used for shadow
+ * call stack.
+ */
+ ldr x18, [x0, #96]
+ str xzr, [x0, #96]
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
--
2.24.0.393.g34dc348eaf-goog

2019-12-06 22:16:36

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v6 14/15] arm64: implement Shadow Call Stack

This change implements shadow stack switching, initial SCS set-up,
and interrupt shadow stacks for arm64.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/Kconfig | 5 ++++
arch/arm64/include/asm/scs.h | 37 +++++++++++++++++++++++++
arch/arm64/include/asm/thread_info.h | 3 +++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +++
arch/arm64/kernel/entry.S | 31 +++++++++++++++++++--
arch/arm64/kernel/head.S | 9 +++++++
arch/arm64/kernel/irq.c | 2 ++
arch/arm64/kernel/process.c | 2 ++
arch/arm64/kernel/scs.c | 40 ++++++++++++++++++++++++++++
arch/arm64/kernel/smp.c | 4 +++
11 files changed, 135 insertions(+), 2 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 49e5f94ff4af..073b19db23f2 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -65,6 +65,7 @@ config ARM64
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_SUPPORTS_MEMORY_FAILURE
+ select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
select ARCH_SUPPORTS_NUMA_BALANCING
@@ -994,6 +995,10 @@ config ARCH_HAS_CACHE_LINE_SIZE
config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y if PGTABLE_LEVELS > 2

+# Supported by clang >= 7.0
+config CC_HAVE_SHADOW_CALL_STACK
+ def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
+
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
---help---
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
new file mode 100644
index 000000000000..c50d2b0c6c5f
--- /dev/null
+++ b/arch/arm64/include/asm/scs.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/scs.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+extern void scs_init_irq(void);
+
+static __always_inline void scs_save(struct task_struct *tsk)
+{
+ void *s;
+
+ asm volatile("mov %0, x18" : "=r" (s));
+ task_set_scs(tsk, s);
+}
+
+static inline void scs_overflow_check(struct task_struct *tsk)
+{
+ if (unlikely(scs_corrupted(tsk)))
+ panic("corrupted shadow stack detected inside scheduler\n");
+}
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+static inline void scs_init_irq(void) {}
+static inline void scs_save(struct task_struct *tsk) {}
+static inline void scs_overflow_check(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* __ASSEMBLY __ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index f0cec4160136..8c73764b9ed2 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -41,6 +41,9 @@ struct thread_info {
#endif
} preempt;
};
+#ifdef CONFIG_SHADOW_CALL_STACK
+ void *shadow_call_stack;
+#endif
};

#define thread_saved_pc(tsk) \
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index fc6488660f64..08fafc4da2cf 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-y += vdso/ probes/
obj-$(CONFIG_COMPAT_VDSO) += vdso32/
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index a5bdce8af65b..d485dc5cd196 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -33,6 +33,9 @@ int main(void)
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
+#endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+ DEFINE(TSK_TI_SCS, offsetof(struct task_struct, thread_info.shadow_call_stack));
#endif
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
#ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 583f71abbe98..7aa2d366b2df 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -172,6 +172,10 @@ alternative_cb_end

apply_ssbd 1, x22, x23

+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [tsk, #TSK_TI_SCS] // Restore shadow call stack
+ str xzr, [tsk, #TSK_TI_SCS] // Limit visibility of saved SCS
+#endif
.else
add x21, sp, #S_FRAME_SIZE
get_current_task tsk
@@ -280,6 +284,12 @@ alternative_else_nop_endif
ct_user_enter
.endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ .if \el == 0
+ str x18, [tsk, #TSK_TI_SCS] // Save shadow call stack
+ .endif
+#endif
+
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
@@ -385,6 +395,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

.macro irq_stack_entry
mov x19, sp // preserve the original sp
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x20, x18 // preserve the original shadow stack
+#endif

/*
* Compare sp with the base of the task stack.
@@ -402,15 +415,24 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

/* switch to the irq stack */
mov sp, x26
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* also switch to the irq shadow stack */
+ ldr_this_cpu x18, irq_shadow_call_stack_ptr, x26
+#endif
+
9998:
.endm

/*
- * x19 should be preserved between irq_stack_entry and
- * irq_stack_exit.
+ * The callee-saved regs (x19-x29) should be preserved between
+ * irq_stack_entry and irq_stack_exit.
*/
.macro irq_stack_exit
mov sp, x19
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x18, x20
+#endif
.endm

/* GPRs used by entry code */
@@ -894,6 +916,11 @@ ENTRY(cpu_switch_to)
ldr lr, [x8]
mov sp, x9
msr sp_el0, x1
+#ifdef CONFIG_SHADOW_CALL_STACK
+ str x18, [x0, #TSK_TI_SCS]
+ ldr x18, [x1, #TSK_TI_SCS]
+ str xzr, [x1, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
ret
ENDPROC(cpu_switch_to)
NOKPROBE(cpu_switch_to)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 989b1944cb71..ca561de903d4 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -27,6 +27,7 @@
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/scs.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
@@ -424,6 +425,10 @@ __primary_switched:
stp xzr, x30, [sp, #-16]!
mov x29, sp

+#ifdef CONFIG_SHADOW_CALL_STACK
+ adr_l x18, init_shadow_call_stack // Set shadow call stack
+#endif
+
str_l x21, __fdt_pointer, x5 // Save FDT pointer

ldr_l x4, kimage_vaddr // Save the offset between
@@ -731,6 +736,10 @@ __secondary_switched:
ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow
msr sp_el0, x2
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [x2, #TSK_TI_SCS] // set shadow call stack
+ str xzr, [x2, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
mov x29, #0
mov x30, #0
b secondary_start_kernel
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 04a327ccf84d..fe0ca522ff60 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -21,6 +21,7 @@
#include <linux/vmalloc.h>
#include <asm/daifflags.h>
#include <asm/vmap_stack.h>
+#include <asm/scs.h>

unsigned long irq_err_count;

@@ -63,6 +64,7 @@ static void init_irq_stacks(void)
void __init init_IRQ(void)
{
init_irq_stacks();
+ scs_init_irq();
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 71f788cd2b18..5f0aec285848 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -52,6 +52,7 @@
#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/pointer_auth.h>
+#include <asm/scs.h>
#include <asm/stacktrace.h>

#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
@@ -507,6 +508,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
uao_thread_switch(next);
ptrauth_thread_switch(next);
ssbs_thread_switch(next);
+ scs_overflow_check(next);

/*
* Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
new file mode 100644
index 000000000000..eaadf5430baa
--- /dev/null
+++ b/arch/arm64/kernel/scs.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/percpu.h>
+#include <linux/vmalloc.h>
+#include <asm/pgtable.h>
+#include <asm/scs.h>
+
+DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+
+#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
+DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
+ __aligned(SCS_SIZE);
+#endif
+
+void scs_init_irq(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+ unsigned long *p;
+
+ p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
+#else
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ per_cpu(irq_shadow_call_stack, cpu);
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+ }
+}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index ab149bcc3dc7..416cf4da6f0c 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -44,6 +44,7 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
+#include <asm/scs.h>
#include <asm/smp_plat.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -356,6 +357,9 @@ void cpu_die(void)
{
unsigned int cpu = smp_processor_id();

+ /* Save the shadow stack pointer before exiting the idle task */
+ scs_save(current);
+
idle_task_exit();

local_daif_mask();
--
2.24.0.393.g34dc348eaf-goog

2019-12-06 22:16:40

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v6 11/15] arm64: efi: restore x18 if it was corrupted

If we detect a corrupted x18 and SCS is enabled, restore the register
before jumping back to instrumented code. This is safe, because the
wrapper is called with preemption disabled and a separate shadow stack
is used for interrupt handling.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/kernel/efi-rt-wrapper.S | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 3fc71106cb2b..62f0260f5c17 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -34,5 +34,14 @@ ENTRY(__efi_rt_asm_wrapper)
ldp x29, x30, [sp], #32
b.ne 0f
ret
-0: b efi_handle_corrupted_x18 // tail call
+0:
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /*
+ * Restore x18 before returning to instrumented code. This is
+ * safe because the wrapper is called with preemption disabled and
+ * a separate shadow stack is used for interrupts.
+ */
+ mov x18, x2
+#endif
+ b efi_handle_corrupted_x18 // tail call
ENDPROC(__efi_rt_asm_wrapper)
--
2.24.0.393.g34dc348eaf-goog

2020-01-16 17:44:24

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v6 10/15] arm64: preserve x18 when CPU is suspended

On Fri, Dec 06, 2019 at 02:13:46PM -0800, Sami Tolvanen wrote:
> Don't lose the current task's shadow stack when the CPU is suspended.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Nick Desaulniers <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> Reviewed-by: Mark Rutland <[email protected]>
> ---
> arch/arm64/include/asm/suspend.h | 2 +-
> arch/arm64/mm/proc.S | 14 ++++++++++++++
> 2 files changed, 15 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
> index 8939c87c4dce..0cde2f473971 100644
> --- a/arch/arm64/include/asm/suspend.h
> +++ b/arch/arm64/include/asm/suspend.h
> @@ -2,7 +2,7 @@
> #ifndef __ASM_SUSPEND_H
> #define __ASM_SUSPEND_H
>
> -#define NR_CTX_REGS 12
> +#define NR_CTX_REGS 13
> #define NR_CALLEE_SAVED_REGS 12
>
> /*
> diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
> index fdabf40a83c8..5c8219c55948 100644
> --- a/arch/arm64/mm/proc.S
> +++ b/arch/arm64/mm/proc.S
> @@ -49,6 +49,8 @@
> * cpu_do_suspend - save CPU registers context
> *
> * x0: virtual address of context pointer
> + *
> + * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
> */
> ENTRY(cpu_do_suspend)
> mrs x2, tpidr_el0
> @@ -73,6 +75,11 @@ alternative_endif
> stp x8, x9, [x0, #48]
> stp x10, x11, [x0, #64]
> stp x12, x13, [x0, #80]
> + /*
> + * Save x18 as it may be used as a platform register, e.g. by shadow
> + * call stack.
> + */
> + str x18, [x0, #96]
> ret
> ENDPROC(cpu_do_suspend)
>
> @@ -89,6 +96,13 @@ ENTRY(cpu_do_resume)
> ldp x9, x10, [x0, #48]
> ldp x11, x12, [x0, #64]
> ldp x13, x14, [x0, #80]
> + /*
> + * Restore x18, as it may be used as a platform register, and clear
> + * the buffer to minimize the risk of exposure when used for shadow
> + * call stack.
> + */
> + ldr x18, [x0, #96]
> + str xzr, [x0, #96]

Mumble, mumble, spectre-v4.

But I think it's fairly hopeless trying to fix that everywhere it crops up,
so:

Acked-by: Will Deacon <[email protected]>

Will

2020-01-16 17:49:21

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v6 13/15] arm64: disable SCS for hypervisor code

On Fri, Dec 06, 2019 at 02:13:49PM -0800, Sami Tolvanen wrote:
> Filter out CC_FLAGS_SCS for code that runs at a different exception
> level.
>
> Suggested-by: Steven Rostedt (VMware) <[email protected]>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> Reviewed-by: Mark Rutland <[email protected]>
> ---
> arch/arm64/kvm/hyp/Makefile | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
> index ea710f674cb6..17ea3da325e9 100644
> --- a/arch/arm64/kvm/hyp/Makefile
> +++ b/arch/arm64/kvm/hyp/Makefile
> @@ -28,3 +28,6 @@ GCOV_PROFILE := n
> KASAN_SANITIZE := n
> UBSAN_SANITIZE := n
> KCOV_INSTRUMENT := n
> +
> +# remove the SCS flags from all objects in this directory
> +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))

Same comment as for the vDSO; can we remove the -ffixed-x18 as well?

Will

2020-01-16 17:50:35

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v6 15/15] arm64: scs: add shadow stacks for SDEI

[+James, since this needs his Ack before it can be merged]

On Fri, Dec 06, 2019 at 02:13:51PM -0800, Sami Tolvanen wrote:
> This change adds per-CPU shadow call stacks for the SDEI handler.
> Similarly to how the kernel stacks are handled, we add separate shadow
> stacks for normal and critical events.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> arch/arm64/include/asm/scs.h | 2 +
> arch/arm64/kernel/entry.S | 14 ++++-
> arch/arm64/kernel/scs.c | 106 +++++++++++++++++++++++++++++------
> arch/arm64/kernel/sdei.c | 7 +++
> 4 files changed, 112 insertions(+), 17 deletions(-)
>
> diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
> index c50d2b0c6c5f..8e327e14bc15 100644
> --- a/arch/arm64/include/asm/scs.h
> +++ b/arch/arm64/include/asm/scs.h
> @@ -9,6 +9,7 @@
> #ifdef CONFIG_SHADOW_CALL_STACK
>
> extern void scs_init_irq(void);
> +extern int scs_init_sdei(void);
>
> static __always_inline void scs_save(struct task_struct *tsk)
> {
> @@ -27,6 +28,7 @@ static inline void scs_overflow_check(struct task_struct *tsk)
> #else /* CONFIG_SHADOW_CALL_STACK */
>
> static inline void scs_init_irq(void) {}
> +static inline int scs_init_sdei(void) { return 0; }
> static inline void scs_save(struct task_struct *tsk) {}
> static inline void scs_overflow_check(struct task_struct *tsk) {}
>
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 7aa2d366b2df..9327c3d21b64 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -1048,13 +1048,16 @@ ENTRY(__sdei_asm_handler)
>
> mov x19, x1
>
> +#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
> + ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
> +#endif
> +
> #ifdef CONFIG_VMAP_STACK
> /*
> * entry.S may have been using sp as a scratch register, find whether
> * this is a normal or critical event and switch to the appropriate
> * stack for this CPU.
> */
> - ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
> cbnz w4, 1f
> ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
> b 2f
> @@ -1064,6 +1067,15 @@ ENTRY(__sdei_asm_handler)
> mov sp, x5
> #endif
>
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + /* Use a separate shadow call stack for normal and critical events */
> + cbnz w4, 3f
> + ldr_this_cpu dst=x18, sym=sdei_shadow_call_stack_normal_ptr, tmp=x6
> + b 4f
> +3: ldr_this_cpu dst=x18, sym=sdei_shadow_call_stack_critical_ptr, tmp=x6
> +4:
> +#endif
> +
> /*
> * We may have interrupted userspace, or a guest, or exit-from or
> * return-to either of these. We can't trust sp_el0, restore it.
> diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
> index eaadf5430baa..dddb7c56518b 100644
> --- a/arch/arm64/kernel/scs.c
> +++ b/arch/arm64/kernel/scs.c
> @@ -10,31 +10,105 @@
> #include <asm/pgtable.h>
> #include <asm/scs.h>
>
> -DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
> +#define DECLARE_SCS(name) \
> + DECLARE_PER_CPU(unsigned long *, name ## _ptr); \
> + DECLARE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name)
>
> -#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
> -DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
> - __aligned(SCS_SIZE);
> +#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
> +#define DEFINE_SCS(name) \
> + DEFINE_PER_CPU(unsigned long *, name ## _ptr)
> +#else
> +/* Allocate a static per-CPU shadow stack */
> +#define DEFINE_SCS(name) \
> + DEFINE_PER_CPU(unsigned long *, name ## _ptr); \
> + DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \
> + __aligned(SCS_SIZE)
> +#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
> +
> +DECLARE_SCS(irq_shadow_call_stack);
> +DECLARE_SCS(sdei_shadow_call_stack_normal);
> +DECLARE_SCS(sdei_shadow_call_stack_critical);
> +
> +DEFINE_SCS(irq_shadow_call_stack);
> +#ifdef CONFIG_ARM_SDE_INTERFACE
> +DEFINE_SCS(sdei_shadow_call_stack_normal);
> +DEFINE_SCS(sdei_shadow_call_stack_critical);
> #endif
>
> +static int scs_alloc_percpu(unsigned long * __percpu *ptr, int cpu)
> +{
> + unsigned long *p;
> +
> + p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
> + VMALLOC_START, VMALLOC_END,
> + GFP_SCS, PAGE_KERNEL,
> + 0, cpu_to_node(cpu),
> + __builtin_return_address(0));
> +
> + if (!p)
> + return -ENOMEM;
> + per_cpu(*ptr, cpu) = p;
> +
> + return 0;
> +}
> +
> +static void scs_free_percpu(unsigned long * __percpu *ptr, int cpu)
> +{
> + unsigned long *p = per_cpu(*ptr, cpu);
> +
> + if (p) {
> + per_cpu(*ptr, cpu) = NULL;
> + vfree(p);
> + }
> +}
> +
> +static void scs_free_sdei(void)
> +{
> + int cpu;
> +
> + for_each_possible_cpu(cpu) {
> + scs_free_percpu(&sdei_shadow_call_stack_normal_ptr, cpu);
> + scs_free_percpu(&sdei_shadow_call_stack_critical_ptr, cpu);
> + }
> +}
> +
> void scs_init_irq(void)
> {
> int cpu;
>
> for_each_possible_cpu(cpu) {
> -#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
> - unsigned long *p;
> + if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK_VMAP))
> + WARN_ON(scs_alloc_percpu(&irq_shadow_call_stack_ptr,
> + cpu));
> + else
> + per_cpu(irq_shadow_call_stack_ptr, cpu) =
> + per_cpu(irq_shadow_call_stack, cpu);
> + }
> +}
>
> - p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
> - VMALLOC_START, VMALLOC_END,
> - GFP_SCS, PAGE_KERNEL,
> - 0, cpu_to_node(cpu),
> - __builtin_return_address(0));
> +int scs_init_sdei(void)
> +{
> + int cpu;
>
> - per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
> -#else
> - per_cpu(irq_shadow_call_stack_ptr, cpu) =
> - per_cpu(irq_shadow_call_stack, cpu);
> -#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
> + if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
> + return 0;
> +
> + for_each_possible_cpu(cpu) {
> + if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK_VMAP)) {
> + if (scs_alloc_percpu(
> + &sdei_shadow_call_stack_normal_ptr, cpu) ||
> + scs_alloc_percpu(
> + &sdei_shadow_call_stack_critical_ptr, cpu)) {
> + scs_free_sdei();
> + return -ENOMEM;
> + }
> + } else {
> + per_cpu(sdei_shadow_call_stack_normal_ptr, cpu) =
> + per_cpu(sdei_shadow_call_stack_normal, cpu);
> + per_cpu(sdei_shadow_call_stack_critical_ptr, cpu) =
> + per_cpu(sdei_shadow_call_stack_critical, cpu);
> + }
> }
> +
> + return 0;
> }
> diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
> index d6259dac62b6..2854b9f7760a 100644
> --- a/arch/arm64/kernel/sdei.c
> +++ b/arch/arm64/kernel/sdei.c
> @@ -13,6 +13,7 @@
> #include <asm/kprobes.h>
> #include <asm/mmu.h>
> #include <asm/ptrace.h>
> +#include <asm/scs.h>
> #include <asm/sections.h>
> #include <asm/stacktrace.h>
> #include <asm/sysreg.h>
> @@ -162,6 +163,12 @@ unsigned long sdei_arch_get_entry_point(int conduit)
> return 0;
> }
>
> + if (scs_init_sdei()) {
> + if (IS_ENABLED(CONFIG_VMAP_STACK))
> + free_sdei_stacks();
> + return 0;
> + }
> +
> sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;
>
> #ifdef CONFIG_UNMAP_KERNEL_AT_EL0
> --
> 2.24.0.393.g34dc348eaf-goog
>

2020-01-16 18:13:12

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v6 09/15] arm64: reserve x18 from general allocation with SCS

On Fri, Dec 06, 2019 at 02:13:45PM -0800, Sami Tolvanen wrote:
> Reserve the x18 register from general allocation when SCS is enabled,
> because the compiler uses the register to store the current task's
> shadow stack pointer. Note that all external kernel modules must also be
> compiled with -ffixed-x18 if the kernel has SCS enabled.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Nick Desaulniers <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
> arch/arm64/Makefile | 4 ++++
> 1 file changed, 4 insertions(+)

Acked-by: Will Deacon <[email protected]>

Will

2020-01-16 18:16:11

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v6 12/15] arm64: vdso: disable Shadow Call Stack

On Thu, Jan 16, 2020 at 9:46 AM Will Deacon <[email protected]> wrote:
> Should we be removing -ffixed-x18 too, or does that not propagate here
> anyway?

No, we shouldn't touch -ffixed-x18 here. The vDSO is always built with
x18 reserved since commit 98cd3c3f83fbb ("arm64: vdso: Build vDSO with
-ffixed-x18").

Sami

2020-01-16 18:26:32

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v6 00/15] add support for Clang's Shadow Call Stack

On Fri, Dec 06, 2019 at 02:13:36PM -0800, Sami Tolvanen wrote:
> This patch series adds support for Clang's Shadow Call Stack
> (SCS) mitigation, which uses a separately allocated shadow stack
> to protect against return address overwrites. More information
> can be found here:
>
> https://clang.llvm.org/docs/ShadowCallStack.html

I've queued the first four via arm64.

Will

2020-01-16 21:34:03

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v6 08/15] arm64: disable function graph tracing with SCS

On Fri, Dec 06, 2019 at 02:13:44PM -0800, Sami Tolvanen wrote:
> The graph tracer hooks returns by modifying frame records on the
> (regular) stack, but with SCS the return address is taken from the
> shadow stack, and the value in the frame record has no effect. As we
> don't currently have a mechanism to determine the corresponding slot
> on the shadow stack (and to pass this through the ftrace
> infrastructure), for now let's disable the graph tracer when SCS is
> enabled.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> Reviewed-by: Mark Rutland <[email protected]>
> ---
> arch/arm64/Kconfig | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> index b1b4476ddb83..49e5f94ff4af 100644
> --- a/arch/arm64/Kconfig
> +++ b/arch/arm64/Kconfig
> @@ -149,7 +149,7 @@ config ARM64
> select HAVE_FTRACE_MCOUNT_RECORD
> select HAVE_FUNCTION_TRACER
> select HAVE_FUNCTION_ERROR_INJECTION
> - select HAVE_FUNCTION_GRAPH_TRACER
> + select HAVE_FUNCTION_GRAPH_TRACER if !SHADOW_CALL_STACK
> select HAVE_GCC_PLUGINS
> select HAVE_HW_BREAKPOINT if PERF_EVENTS
> select HAVE_IRQ_TIME_ACCOUNTING

I think this is the wrong way around, as we support the graph tracer
today and so I think SHADOW_CALL_STACK should depend on !GRAPH_TRACER
and possibly even EXPERT until this is resolved.

Will

2020-01-16 22:06:16

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v6 12/15] arm64: vdso: disable Shadow Call Stack

On Fri, Dec 06, 2019 at 02:13:48PM -0800, Sami Tolvanen wrote:
> Shadow stacks are only available in the kernel, so disable SCS
> instrumentation for the vDSO.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Nick Desaulniers <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> Reviewed-by: Mark Rutland <[email protected]>
> ---
> arch/arm64/kernel/vdso/Makefile | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
> index dd2514bb1511..a87a4f11724e 100644
> --- a/arch/arm64/kernel/vdso/Makefile
> +++ b/arch/arm64/kernel/vdso/Makefile
> @@ -25,7 +25,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING
>
> VDSO_LDFLAGS := -Bsymbolic
>
> -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
> +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS)

Should we be removing -ffixed-x18 too, or does that not propagate here
anyway?

Will

2020-01-16 22:12:06

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v6 11/15] arm64: efi: restore x18 if it was corrupted

On Fri, Dec 06, 2019 at 02:13:47PM -0800, Sami Tolvanen wrote:
> If we detect a corrupted x18 and SCS is enabled, restore the register
> before jumping back to instrumented code. This is safe, because the
> wrapper is called with preemption disabled and a separate shadow stack
> is used for interrupt handling.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
> arch/arm64/kernel/efi-rt-wrapper.S | 11 ++++++++++-
> 1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
> index 3fc71106cb2b..62f0260f5c17 100644
> --- a/arch/arm64/kernel/efi-rt-wrapper.S
> +++ b/arch/arm64/kernel/efi-rt-wrapper.S
> @@ -34,5 +34,14 @@ ENTRY(__efi_rt_asm_wrapper)
> ldp x29, x30, [sp], #32
> b.ne 0f
> ret
> -0: b efi_handle_corrupted_x18 // tail call
> +0:
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + /*
> + * Restore x18 before returning to instrumented code. This is
> + * safe because the wrapper is called with preemption disabled and
> + * a separate shadow stack is used for interrupts.
> + */
> + mov x18, x2
> +#endif

Why not restore it regardless of CONFIG_SHADOW_CALL_STACK?

Will

2020-01-16 23:24:14

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v6 13/15] arm64: disable SCS for hypervisor code

On Thu, Jan 16, 2020 at 9:47 AM Will Deacon <[email protected]> wrote:
>
> On Fri, Dec 06, 2019 at 02:13:49PM -0800, Sami Tolvanen wrote:
> > Filter out CC_FLAGS_SCS for code that runs at a different exception
> > level.
> >
> > Suggested-by: Steven Rostedt (VMware) <[email protected]>
> > Signed-off-by: Sami Tolvanen <[email protected]>
> > Reviewed-by: Kees Cook <[email protected]>
> > Reviewed-by: Mark Rutland <[email protected]>
> > ---
> > arch/arm64/kvm/hyp/Makefile | 3 +++
> > 1 file changed, 3 insertions(+)
> >
> > diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
> > index ea710f674cb6..17ea3da325e9 100644
> > --- a/arch/arm64/kvm/hyp/Makefile
> > +++ b/arch/arm64/kvm/hyp/Makefile
> > @@ -28,3 +28,6 @@ GCOV_PROFILE := n
> > KASAN_SANITIZE := n
> > UBSAN_SANITIZE := n
> > KCOV_INSTRUMENT := n
> > +
> > +# remove the SCS flags from all objects in this directory
> > +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
>
> Same comment as for the vDSO; can we remove the -ffixed-x18 as well?

Sure, I don't see why not. I'll change this in the next version.

Sami

2020-01-16 23:25:44

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v6 11/15] arm64: efi: restore x18 if it was corrupted

On Thu, Jan 16, 2020 at 9:45 AM Will Deacon <[email protected]> wrote:
>
> On Fri, Dec 06, 2019 at 02:13:47PM -0800, Sami Tolvanen wrote:
> > -0: b efi_handle_corrupted_x18 // tail call
> > +0:
> > +#ifdef CONFIG_SHADOW_CALL_STACK
> > + /*
> > + * Restore x18 before returning to instrumented code. This is
> > + * safe because the wrapper is called with preemption disabled and
> > + * a separate shadow stack is used for interrupts.
> > + */
> > + mov x18, x2
> > +#endif
>
> Why not restore it regardless of CONFIG_SHADOW_CALL_STACK?

The ifdefs are here only because restoring the register without SCS
isn't actually necessary, but I'm fine with dropping them (and editing
the comment) in the next version if you prefer.

Sami

2020-01-16 23:26:08

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v6 14/15] arm64: implement Shadow Call Stack

On Thu, Jan 16, 2020 at 10:24 AM Will Deacon <[email protected]> wrote:
> > .macro irq_stack_entry
> > mov x19, sp // preserve the original sp
> > +#ifdef CONFIG_SHADOW_CALL_STACK
> > + mov x20, x18 // preserve the original shadow stack
> > +#endif
>
> Hmm, not sure about corrupting x20 here. Doesn't it hold the PMR value from
> kernel_entry?

You're right, and it's used in el1_irq after irq_handler if
CONFIG_ARM64_PSEUDO_NMI is enabled. Thanks for pointing this out.
Looks like one of x24-x29 should be safe here, and the comment needs
to be updated to explain why x20-x23 shouldn't be corrupted.

Sami

2020-01-17 00:01:00

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v6 12/15] arm64: vdso: disable Shadow Call Stack

On Thu, Jan 16, 2020 at 10:14:24AM -0800, Sami Tolvanen wrote:
> On Thu, Jan 16, 2020 at 9:46 AM Will Deacon <[email protected]> wrote:
> > Should we be removing -ffixed-x18 too, or does that not propagate here
> > anyway?
>
> No, we shouldn't touch -ffixed-x18 here. The vDSO is always built with
> x18 reserved since commit 98cd3c3f83fbb ("arm64: vdso: Build vDSO with
> -ffixed-x18").

Thanks, in which case:

Acked-by: Will Deacon <[email protected]>

Will

2020-01-17 00:02:22

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v6 14/15] arm64: implement Shadow Call Stack

On Fri, Dec 06, 2019 at 02:13:50PM -0800, Sami Tolvanen wrote:
> This change implements shadow stack switching, initial SCS set-up,
> and interrupt shadow stacks for arm64.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
> arch/arm64/Kconfig | 5 ++++
> arch/arm64/include/asm/scs.h | 37 +++++++++++++++++++++++++
> arch/arm64/include/asm/thread_info.h | 3 +++
> arch/arm64/kernel/Makefile | 1 +
> arch/arm64/kernel/asm-offsets.c | 3 +++
> arch/arm64/kernel/entry.S | 31 +++++++++++++++++++--
> arch/arm64/kernel/head.S | 9 +++++++
> arch/arm64/kernel/irq.c | 2 ++
> arch/arm64/kernel/process.c | 2 ++
> arch/arm64/kernel/scs.c | 40 ++++++++++++++++++++++++++++
> arch/arm64/kernel/smp.c | 4 +++
> 11 files changed, 135 insertions(+), 2 deletions(-)
> create mode 100644 arch/arm64/include/asm/scs.h
> create mode 100644 arch/arm64/kernel/scs.c

[...]

> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 583f71abbe98..7aa2d366b2df 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -172,6 +172,10 @@ alternative_cb_end
>
> apply_ssbd 1, x22, x23
>
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + ldr x18, [tsk, #TSK_TI_SCS] // Restore shadow call stack
> + str xzr, [tsk, #TSK_TI_SCS] // Limit visibility of saved SCS
> +#endif
> .else
> add x21, sp, #S_FRAME_SIZE
> get_current_task tsk
> @@ -280,6 +284,12 @@ alternative_else_nop_endif
> ct_user_enter
> .endif
>
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + .if \el == 0
> + str x18, [tsk, #TSK_TI_SCS] // Save shadow call stack
> + .endif
> +#endif
> +
> #ifdef CONFIG_ARM64_SW_TTBR0_PAN
> /*
> * Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
> @@ -385,6 +395,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
>
> .macro irq_stack_entry
> mov x19, sp // preserve the original sp
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + mov x20, x18 // preserve the original shadow stack
> +#endif

Hmm, not sure about corrupting x20 here. Doesn't it hold the PMR value from
kernel_entry?

Rest of the patch looks ok, but I'll do a proper review when it's closer to
being merged as we've got a bunch of other entry changes in the pipeline.

Will

2020-01-17 02:09:18

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v6 08/15] arm64: disable function graph tracing with SCS

On Thu, Jan 16, 2020 at 9:39 AM Will Deacon <[email protected]> wrote:
>
> On Fri, Dec 06, 2019 at 02:13:44PM -0800, Sami Tolvanen wrote:
> > diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
> > index b1b4476ddb83..49e5f94ff4af 100644
> > --- a/arch/arm64/Kconfig
> > +++ b/arch/arm64/Kconfig
> > @@ -149,7 +149,7 @@ config ARM64
> > select HAVE_FTRACE_MCOUNT_RECORD
> > select HAVE_FUNCTION_TRACER
> > select HAVE_FUNCTION_ERROR_INJECTION
> > - select HAVE_FUNCTION_GRAPH_TRACER
> > + select HAVE_FUNCTION_GRAPH_TRACER if !SHADOW_CALL_STACK
> > select HAVE_GCC_PLUGINS
> > select HAVE_HW_BREAKPOINT if PERF_EVENTS
> > select HAVE_IRQ_TIME_ACCOUNTING
>
> I think this is the wrong way around, as we support the graph tracer
> today and so I think SHADOW_CALL_STACK should depend on !GRAPH_TRACER
> and possibly even EXPERT until this is resolved.

Sure, sounds reasonable. I'll change this in the next version.

Sami

2020-01-28 18:50:46

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v7 00/11] add support for Clang's Shadow Call Stack

This patch series adds support for Clang's Shadow Call Stack
(SCS) mitigation, which uses a separately allocated shadow stack
to protect against return address overwrites. More information
can be found here:

https://clang.llvm.org/docs/ShadowCallStack.html

SCS provides better protection against traditional buffer
overflows than CONFIG_STACKPROTECTOR_*, but it should be noted
that SCS security guarantees in the kernel differ from the ones
documented for user space. The kernel must store addresses of
shadow stacks used by inactive tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

SCS is currently supported only on arm64, where the compiler
requires the x18 register to be reserved for holding the current
task's shadow stack pointer.

With -fsanitize=shadow-call-stack, the compiler injects
instructions to all non-leaf C functions to store the return
address to the shadow stack, and unconditionally load it again
before returning. As a result, SCS is currently incompatible
with features that rely on modifying function return addresses
in the kernel stack to alter control flow, such as function
graph tracing, although it may be possible to later change these
features to modify the shadow stack instead. A copy of the return
address is still kept in the kernel stack for compatibility with
stack unwinding, for example.

SCS has a minimal performance overhead, but allocating
shadow stacks increases kernel memory usage. The feature is
therefore mostly useful on hardware that lacks support for PAC
instructions.

Changes in v7:
- Changed irq_stack_entry/exit to store the shadow stack pointer
in x24 instead of x20 as kernel_entry uses x20-x23 to store
data that can be used later. Updated the comment as well.
- Changed the Makefile in arch/arm64/kvm/hyp to also filter out
-ffixed-x18.
- Changed SHADOW_CALL_STACK to depend on !FUNCTION_GRAPH_TRACER
instead of not selecting HAVE_FUNCTION_GRAPH_TRACER with SCS.
- Removed ifdefs from the EFI wrapper and updated the comment to
explain why we are restoring x18.
- Rebased as Ard's x18 patches that were part of this series have
already been merged.

Changes in v6:
- Updated comment in the EFI RT wrapper to include the
explanation from the commit message.
- Fixed the SHADOW_CALL_STACK_VMAP config option and the
compilation errors in scs_init_irq()
- Updated the comment in entry.S to Mark's suggestion
- Fixed the WARN_ON in scs_init() to trip only when the return
value for cpuhp_setup_state() is < 0.
- Removed ifdefs from the code in arch/arm64/kernel/scs.c and
added separate shadow stacks for the SDEI handler

Changes in v5:
- Updated the comment in __scs_base() to Mark's suggestion
- Changed all instances of uintptr_t to unsigned long
- Added allocation poisoning for KASAN to catch unintentional
shadow stack accesses; moved set_set_magic before poisoning
and switched scs_used() and scs_corrupted() to access the
buffer using READ_ONCE_NOCHECK() instead
- Changed scs_free() to check for NULL instead of zero
- Renamed SCS_CACHE_SIZE to NR_CACHED_SCS
- Added a warning if cpuhp_setup_state fails in scs_init()
- Dropped patches disabling kretprobes after confirming there's
no functional conflict with SCS instrumentation
- Added an explanation to the commit message why function graph
tracing and SCS are incompatible
- Removed the ifdefs from arch/arm64/mm/proc.S and added
comments explaining why we are saving and restoring x18
- Updated scs_check_usage format to include process information

Changes in v4:
- Fixed authorship for Ard's patches
- Added missing commit messages
- Commented code that clears SCS from thread_info
- Added a comment about SCS_END_MAGIC being non-canonical

Changes in v3:
- Switched to filter-out for removing SCS flags in Makefiles
- Changed the __noscs attribute to use __no_sanitize__("...")
instead of no_sanitize("...")
- Cleaned up inline function definitions and moved task_scs()
into a macro
- Cleaned up scs_free() and scs_magic()
- Moved SCS initialization into dup_task_struct() and removed
the now unused scs_task_init()
- Added comments to __scs_base() and scs_task_reset() to better
document design choices
- Changed copy_page to make the offset and bias explicit

Changes in v2:
- Changed Ard's KVM patch to use x29 instead of x18 for the
guest context, which makes restore_callee_saved_regs cleaner
- Updated help text (and commit messages) to point out
differences in security properties compared to user space SCS
- Cleaned up config options: removed the ROP protection choice,
replaced the CC_IS_CLANG dependency with an arch-specific
cc-option test, and moved disabling of incompatible config
options to an arch-specific Kconfig
- Added CC_FLAGS_SCS, which are filtered out where needed
instead of using DISABLE_SCS
- Added a __has_feature guard around __noscs for older clang
versions

Sami Tolvanen (11):
add support for Clang's Shadow Call Stack (SCS)
scs: add accounting
scs: add support for stack usage debugging
scs: disable when function graph tracing is enabled
arm64: reserve x18 from general allocation with SCS
arm64: preserve x18 when CPU is suspended
arm64: efi: restore x18 if it was corrupted
arm64: vdso: disable Shadow Call Stack
arm64: disable SCS for hypervisor code
arm64: implement Shadow Call Stack
arm64: scs: add shadow stacks for SDEI

Makefile | 6 +
arch/Kconfig | 35 ++++
arch/arm64/Kconfig | 5 +
arch/arm64/Makefile | 4 +
arch/arm64/include/asm/scs.h | 39 +++++
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/include/asm/thread_info.h | 3 +
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +
arch/arm64/kernel/efi-rt-wrapper.S | 11 +-
arch/arm64/kernel/entry.S | 46 ++++-
arch/arm64/kernel/head.S | 9 +
arch/arm64/kernel/irq.c | 2 +
arch/arm64/kernel/process.c | 2 +
arch/arm64/kernel/scs.c | 114 +++++++++++++
arch/arm64/kernel/sdei.c | 7 +
arch/arm64/kernel/smp.c | 4 +
arch/arm64/kernel/vdso/Makefile | 2 +-
arch/arm64/kvm/hyp/Makefile | 3 +
arch/arm64/mm/proc.S | 14 ++
drivers/base/node.c | 6 +
fs/proc/meminfo.c | 4 +
include/linux/compiler-clang.h | 6 +
include/linux/compiler_types.h | 4 +
include/linux/mmzone.h | 3 +
include/linux/scs.h | 57 +++++++
init/init_task.c | 8 +
kernel/Makefile | 1 +
kernel/fork.c | 9 +
kernel/sched/core.c | 2 +
kernel/scs.c | 246 +++++++++++++++++++++++++++
mm/page_alloc.c | 6 +
mm/vmstat.c | 3 +
33 files changed, 661 insertions(+), 6 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c


base-commit: b0be0eff1a5ab77d588b76bd8b1c92d5d17b3f73
--
2.25.0.341.g760bfbb309-goog

2020-01-28 18:50:48

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v7 01/11] add support for Clang's Shadow Call Stack (SCS)

This change adds generic support for Clang's Shadow Call Stack,
which uses a shadow stack to protect return addresses from being
overwritten by an attacker. Details are available here:

https://clang.llvm.org/docs/ShadowCallStack.html

Note that security guarantees in the kernel differ from the
ones documented for user space. The kernel must store addresses
of shadow stacks used by other tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Miguel Ojeda <[email protected]>
---
Makefile | 6 ++
arch/Kconfig | 34 ++++++
include/linux/compiler-clang.h | 6 ++
include/linux/compiler_types.h | 4 +
include/linux/scs.h | 57 ++++++++++
init/init_task.c | 8 ++
kernel/Makefile | 1 +
kernel/fork.c | 9 ++
kernel/sched/core.c | 2 +
kernel/scs.c | 187 +++++++++++++++++++++++++++++++++
10 files changed, 314 insertions(+)
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c

diff --git a/Makefile b/Makefile
index 6a01b073915e..b2a1e5b704f4 100644
--- a/Makefile
+++ b/Makefile
@@ -846,6 +846,12 @@ ifdef CONFIG_LIVEPATCH
KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
endif

+ifdef CONFIG_SHADOW_CALL_STACK
+CC_FLAGS_SCS := -fsanitize=shadow-call-stack
+KBUILD_CFLAGS += $(CC_FLAGS_SCS)
+export CC_FLAGS_SCS
+endif
+
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)

diff --git a/arch/Kconfig b/arch/Kconfig
index 48b5e103bdb0..1b16aa9a3fe5 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -521,6 +521,40 @@ config STACKPROTECTOR_STRONG
about 20% of all kernel functions, which increases the kernel code
size by about 2%.

+config ARCH_SUPPORTS_SHADOW_CALL_STACK
+ bool
+ help
+ An architecture should select this if it supports Clang's Shadow
+ Call Stack, has asm/scs.h, and implements runtime support for shadow
+ stack switching.
+
+config SHADOW_CALL_STACK
+ bool "Clang Shadow Call Stack"
+ depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
+ help
+ This option enables Clang's Shadow Call Stack, which uses a
+ shadow stack to protect function return addresses from being
+ overwritten by an attacker. More information can be found from
+ Clang's documentation:
+
+ https://clang.llvm.org/docs/ShadowCallStack.html
+
+ Note that security guarantees in the kernel differ from the ones
+ documented for user space. The kernel must store addresses of shadow
+ stacks used by other tasks and interrupt handlers in memory, which
+ means an attacker capable reading and writing arbitrary memory may
+ be able to locate them and hijack control flow by modifying shadow
+ stacks that are not currently in use.
+
+config SHADOW_CALL_STACK_VMAP
+ bool "Use virtually mapped shadow call stacks"
+ depends on SHADOW_CALL_STACK
+ help
+ Use virtually mapped shadow call stacks. Selecting this option
+ provides better stack exhaustion protection, but increases per-thread
+ memory consumption as a full page is allocated for each shadow stack.
+
+
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 333a6695a918..18fc4d29ef27 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -42,3 +42,9 @@
* compilers, like ICC.
*/
#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#if __has_feature(shadow_call_stack)
+# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
+#else
+# define __noscs
+#endif
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 72393a8c1a6c..be5d5be4b1ae 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -202,6 +202,10 @@ struct ftrace_likely_data {
# define randomized_struct_fields_end
#endif

+#ifndef __noscs
+# define __noscs
+#endif
+
#ifndef asm_volatile_goto
#define asm_volatile_goto(x...) asm goto(x)
#endif
diff --git a/include/linux/scs.h b/include/linux/scs.h
new file mode 100644
index 000000000000..c5572fd770b0
--- /dev/null
+++ b/include/linux/scs.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#ifndef _LINUX_SCS_H
+#define _LINUX_SCS_H
+
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+/*
+ * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
+ * architecture) provided ~40% safety margin on stack usage while keeping
+ * memory allocation overhead reasonable.
+ */
+#define SCS_SIZE 1024UL
+#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
+
+/*
+ * A random number outside the kernel's virtual address space to mark the
+ * end of the shadow stack.
+ */
+#define SCS_END_MAGIC 0xaf0194819b1635f6UL
+
+#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack)
+
+static inline void task_set_scs(struct task_struct *tsk, void *s)
+{
+ task_scs(tsk) = s;
+}
+
+extern void scs_init(void);
+extern void scs_task_reset(struct task_struct *tsk);
+extern int scs_prepare(struct task_struct *tsk, int node);
+extern bool scs_corrupted(struct task_struct *tsk);
+extern void scs_release(struct task_struct *tsk);
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+#define task_scs(tsk) NULL
+
+static inline void task_set_scs(struct task_struct *tsk, void *s) {}
+static inline void scs_init(void) {}
+static inline void scs_task_reset(struct task_struct *tsk) {}
+static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
+static inline bool scs_corrupted(struct task_struct *tsk) { return false; }
+static inline void scs_release(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* _LINUX_SCS_H */
diff --git a/init/init_task.c b/init/init_task.c
index 9e5cbe5eab7b..cbd40460e903 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/audit.h>
#include <linux/numa.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <linux/uaccess.h>
@@ -184,6 +185,13 @@ struct task_struct init_task
};
EXPORT_SYMBOL(init_task);

+#ifdef CONFIG_SHADOW_CALL_STACK
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
+ __aligned(SCS_SIZE) = {
+ [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
+};
+#endif
+
/*
* Initial thread structure. Alignment of this is handled by a special
* linker map entry.
diff --git a/kernel/Makefile b/kernel/Makefile
index f2cc0d118a0b..06231f936a7a 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -102,6 +102,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-$(CONFIG_PERF_EVENTS) += events/

diff --git a/kernel/fork.c b/kernel/fork.c
index ef82feb4bddc..2809e9f9f46b 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,6 +94,7 @@
#include <linux/thread_info.h>
#include <linux/stackleak.h>
#include <linux/kasan.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -454,6 +455,8 @@ void put_task_stack(struct task_struct *tsk)

void free_task(struct task_struct *tsk)
{
+ scs_release(tsk);
+
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
@@ -837,6 +840,8 @@ void __init fork_init(void)
NULL, free_vm_stack_cache);
#endif

+ scs_init();
+
lockdep_init_task(&init_task);
uprobes_init();
}
@@ -896,6 +901,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_stack;

+ err = scs_prepare(tsk, node);
+ if (err)
+ goto free_stack;
+
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 90e4b00ace89..a181c536e12e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11,6 +11,7 @@
#include <linux/nospec.h>

#include <linux/kcov.h>
+#include <linux/scs.h>

#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -6038,6 +6039,7 @@ void init_idle(struct task_struct *idle, int cpu)
idle->se.exec_start = sched_clock();
idle->flags |= PF_IDLE;

+ scs_task_reset(idle);
kasan_unpoison_task_stack(idle);

#ifdef CONFIG_SMP
diff --git a/kernel/scs.c b/kernel/scs.c
new file mode 100644
index 000000000000..28abed21950c
--- /dev/null
+++ b/kernel/scs.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/kasan.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/scs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/scs.h>
+
+static inline void *__scs_base(struct task_struct *tsk)
+{
+ /*
+ * To minimize risk the of exposure, architectures may clear a
+ * task's thread_info::shadow_call_stack while that task is
+ * running, and only save/restore the active shadow call stack
+ * pointer when the usual register may be clobbered (e.g. across
+ * context switches).
+ *
+ * The shadow call stack is aligned to SCS_SIZE, and grows
+ * upwards, so we can mask out the low bits to extract the base
+ * when the task is not running.
+ */
+ return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
+}
+
+static inline unsigned long *scs_magic(void *s)
+{
+ return (unsigned long *)(s + SCS_SIZE) - 1;
+}
+
+static inline void scs_set_magic(void *s)
+{
+ *scs_magic(s) = SCS_END_MAGIC;
+}
+
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+
+/* Matches NR_CACHED_STACKS for VMAP_STACK */
+#define NR_CACHED_SCS 2
+static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]);
+
+static void *scs_alloc(int node)
+{
+ int i;
+ void *s;
+
+ for (i = 0; i < NR_CACHED_SCS; i++) {
+ s = this_cpu_xchg(scs_cache[i], NULL);
+ if (s) {
+ memset(s, 0, SCS_SIZE);
+ goto out;
+ }
+ }
+
+ /*
+ * We allocate a full page for the shadow stack, which should be
+ * more than we need. Check the assumption nevertheless.
+ */
+ BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);
+
+ s = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL, 0,
+ node, __builtin_return_address(0));
+
+out:
+ if (s)
+ scs_set_magic(s);
+ /* TODO: poison for KASAN, unpoison in scs_free */
+
+ return s;
+}
+
+static void scs_free(void *s)
+{
+ int i;
+
+ for (i = 0; i < NR_CACHED_SCS; i++)
+ if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL)
+ return;
+
+ vfree_atomic(s);
+}
+
+static int scs_cleanup(unsigned int cpu)
+{
+ int i;
+ void **cache = per_cpu_ptr(scs_cache, cpu);
+
+ for (i = 0; i < NR_CACHED_SCS; i++) {
+ vfree(cache[i]);
+ cache[i] = NULL;
+ }
+
+ return 0;
+}
+
+void __init scs_init(void)
+{
+ WARN_ON(cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
+ scs_cleanup) < 0);
+}
+
+#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */
+
+static struct kmem_cache *scs_cache;
+
+static inline void *scs_alloc(int node)
+{
+ void *s;
+
+ s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
+ if (s) {
+ scs_set_magic(s);
+ /*
+ * Poison the allocation to catch unintentional accesses to
+ * the shadow stack when KASAN is enabled.
+ */
+ kasan_poison_object_data(scs_cache, s);
+ }
+
+ return s;
+}
+
+static inline void scs_free(void *s)
+{
+ kasan_unpoison_object_data(scs_cache, s);
+ kmem_cache_free(scs_cache, s);
+}
+
+void __init scs_init(void)
+{
+ scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
+ 0, NULL);
+ WARN_ON(!scs_cache);
+}
+
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+void scs_task_reset(struct task_struct *tsk)
+{
+ /*
+ * Reset the shadow stack to the base address in case the task
+ * is reused.
+ */
+ task_set_scs(tsk, __scs_base(tsk));
+}
+
+int scs_prepare(struct task_struct *tsk, int node)
+{
+ void *s;
+
+ s = scs_alloc(node);
+ if (!s)
+ return -ENOMEM;
+
+ task_set_scs(tsk, s);
+ return 0;
+}
+
+bool scs_corrupted(struct task_struct *tsk)
+{
+ unsigned long *magic = scs_magic(__scs_base(tsk));
+
+ return READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
+}
+
+void scs_release(struct task_struct *tsk)
+{
+ void *s;
+
+ s = __scs_base(tsk);
+ if (!s)
+ return;
+
+ WARN_ON(scs_corrupted(tsk));
+
+ task_set_scs(tsk, NULL);
+ scs_free(s);
+}
--
2.25.0.341.g760bfbb309-goog

2020-01-28 18:50:53

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v7 02/11] scs: add accounting

This change adds accounting for the memory allocated for shadow stacks.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
drivers/base/node.c | 6 ++++++
fs/proc/meminfo.c | 4 ++++
include/linux/mmzone.h | 3 +++
kernel/scs.c | 20 ++++++++++++++++++++
mm/page_alloc.c | 6 ++++++
mm/vmstat.c | 3 +++
6 files changed, 42 insertions(+)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 98a31bafc8a2..874a8b428438 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d AnonPages: %8lu kB\n"
"Node %d Shmem: %8lu kB\n"
"Node %d KernelStack: %8lu kB\n"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ "Node %d ShadowCallStack:%8lu kB\n"
+#endif
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
@@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8c1f1bb1a5ce..49768005a79e 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB));
+#ifdef CONFIG_SHADOW_CALL_STACK
+ seq_printf(m, "ShadowCallStack:%8lu kB\n",
+ global_zone_page_state(NR_KERNEL_SCS_BYTES) / 1024);
+#endif
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 5334ad8fc7bd..1a379a0f2940 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -200,6 +200,9 @@ enum zone_stat_item {
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ NR_KERNEL_SCS_BYTES, /* measured in bytes */
+#endif
/* Second 128 byte cacheline */
NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
diff --git a/kernel/scs.c b/kernel/scs.c
index 28abed21950c..5245e992c692 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -12,6 +12,7 @@
#include <linux/scs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/vmstat.h>
#include <asm/scs.h>

static inline void *__scs_base(struct task_struct *tsk)
@@ -89,6 +90,11 @@ static void scs_free(void *s)
vfree_atomic(s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return vmalloc_to_page(__scs_base(tsk));
+}
+
static int scs_cleanup(unsigned int cpu)
{
int i;
@@ -135,6 +141,11 @@ static inline void scs_free(void *s)
kmem_cache_free(scs_cache, s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return virt_to_page(__scs_base(tsk));
+}
+
void __init scs_init(void)
{
scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
@@ -153,6 +164,12 @@ void scs_task_reset(struct task_struct *tsk)
task_set_scs(tsk, __scs_base(tsk));
}

+static void scs_account(struct task_struct *tsk, int account)
+{
+ mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_BYTES,
+ account * SCS_SIZE);
+}
+
int scs_prepare(struct task_struct *tsk, int node)
{
void *s;
@@ -162,6 +179,8 @@ int scs_prepare(struct task_struct *tsk, int node)
return -ENOMEM;

task_set_scs(tsk, s);
+ scs_account(tsk, 1);
+
return 0;
}

@@ -182,6 +201,7 @@ void scs_release(struct task_struct *tsk)

WARN_ON(scs_corrupted(tsk));

+ scs_account(tsk, -1);
task_set_scs(tsk, NULL);
scs_free(s);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d047bf7d8fd4..284e428e71c8 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5340,6 +5340,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" managed:%lukB"
" mlocked:%lukB"
" kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ " shadow_call_stack:%lukB"
+#endif
" pagetables:%lukB"
" bounce:%lukB"
" free_pcp:%lukB"
@@ -5362,6 +5365,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)),
zone_page_state(zone, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ zone_page_state(zone, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
K(zone_page_state(zone, NR_PAGETABLE)),
K(zone_page_state(zone, NR_BOUNCE)),
K(free_pcp),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 78d53378db99..d0650391c8c1 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1119,6 +1119,9 @@ const char * const vmstat_text[] = {
"nr_mlock",
"nr_page_table_pages",
"nr_kernel_stack",
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ "nr_shadow_call_stack_bytes",
+#endif
"nr_bounce",
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
--
2.25.0.341.g760bfbb309-goog

2020-01-28 18:50:57

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v7 03/11] scs: add support for stack usage debugging

Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks. When enabled,
also prints out the highest shadow stack usage per process.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
kernel/scs.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)

diff --git a/kernel/scs.c b/kernel/scs.c
index 5245e992c692..ad74d13f2c0f 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -184,6 +184,44 @@ int scs_prepare(struct task_struct *tsk, int node)
return 0;
}

+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long scs_used(struct task_struct *tsk)
+{
+ unsigned long *p = __scs_base(tsk);
+ unsigned long *end = scs_magic(p);
+ unsigned long s = (unsigned long)p;
+
+ while (p < end && READ_ONCE_NOCHECK(*p))
+ p++;
+
+ return (unsigned long)p - s;
+}
+
+static void scs_check_usage(struct task_struct *tsk)
+{
+ static DEFINE_SPINLOCK(lock);
+ static unsigned long highest;
+ unsigned long used = scs_used(tsk);
+
+ if (used <= highest)
+ return;
+
+ spin_lock(&lock);
+
+ if (used > highest) {
+ pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
+ tsk->comm, task_pid_nr(tsk), used);
+ highest = used;
+ }
+
+ spin_unlock(&lock);
+}
+#else
+static inline void scs_check_usage(struct task_struct *tsk)
+{
+}
+#endif
+
bool scs_corrupted(struct task_struct *tsk)
{
unsigned long *magic = scs_magic(__scs_base(tsk));
@@ -200,6 +238,7 @@ void scs_release(struct task_struct *tsk)
return;

WARN_ON(scs_corrupted(tsk));
+ scs_check_usage(tsk);

scs_account(tsk, -1);
task_set_scs(tsk, NULL);
--
2.25.0.341.g760bfbb309-goog

2020-01-28 18:51:04

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v7 04/11] scs: disable when function graph tracing is enabled

The graph tracer hooks returns by modifying frame records on the
(regular) stack, but with SCS the return address is taken from the
shadow stack, and the value in the frame record has no effect. As we
don't currently have a mechanism to determine the corresponding slot
on the shadow stack (and to pass this through the ftrace
infrastructure), for now let's disable SCS when the graph tracer is
enabled.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/Kconfig | 1 +
1 file changed, 1 insertion(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index 1b16aa9a3fe5..0d746373c52e 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -530,6 +530,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK

config SHADOW_CALL_STACK
bool "Clang Shadow Call Stack"
+ depends on !FUNCTION_GRAPH_TRACER
depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
help
This option enables Clang's Shadow Call Stack, which uses a
--
2.25.0.341.g760bfbb309-goog

2020-01-28 18:51:09

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v7 06/11] arm64: preserve x18 when CPU is suspended

Don't lose the current task's shadow stack when the CPU is suspended.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/mm/proc.S | 14 ++++++++++++++
2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 8939c87c4dce..0cde2f473971 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H

-#define NR_CTX_REGS 12
+#define NR_CTX_REGS 13
#define NR_CALLEE_SAVED_REGS 12

/*
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index aafed6902411..7d37e3c70ff5 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -56,6 +56,8 @@
* cpu_do_suspend - save CPU registers context
*
* x0: virtual address of context pointer
+ *
+ * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
*/
SYM_FUNC_START(cpu_do_suspend)
mrs x2, tpidr_el0
@@ -80,6 +82,11 @@ alternative_endif
stp x8, x9, [x0, #48]
stp x10, x11, [x0, #64]
stp x12, x13, [x0, #80]
+ /*
+ * Save x18 as it may be used as a platform register, e.g. by shadow
+ * call stack.
+ */
+ str x18, [x0, #96]
ret
SYM_FUNC_END(cpu_do_suspend)

@@ -96,6 +103,13 @@ SYM_FUNC_START(cpu_do_resume)
ldp x9, x10, [x0, #48]
ldp x11, x12, [x0, #64]
ldp x13, x14, [x0, #80]
+ /*
+ * Restore x18, as it may be used as a platform register, and clear
+ * the buffer to minimize the risk of exposure when used for shadow
+ * call stack.
+ */
+ ldr x18, [x0, #96]
+ str xzr, [x0, #96]
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
--
2.25.0.341.g760bfbb309-goog

2020-01-28 18:51:16

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v7 07/11] arm64: efi: restore x18 if it was corrupted

If we detect a corrupted x18, restore the register before jumping back
to potentially SCS instrumented code. This is safe, because the wrapper
is called with preemption disabled and a separate shadow stack is used
for interrupt handling.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/kernel/efi-rt-wrapper.S | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 3fc71106cb2b..6ca6c0dc11a1 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -34,5 +34,14 @@ ENTRY(__efi_rt_asm_wrapper)
ldp x29, x30, [sp], #32
b.ne 0f
ret
-0: b efi_handle_corrupted_x18 // tail call
+0:
+ /*
+ * With CONFIG_SHADOW_CALL_STACK, the kernel uses x18 to store a
+ * shadow stack pointer, which we need to restore before returning to
+ * potentially instrumented code. This is safe because the wrapper is
+ * called with preemption disabled and a separate shadow stack is used
+ * for interrupts.
+ */
+ mov x18, x2
+ b efi_handle_corrupted_x18 // tail call
ENDPROC(__efi_rt_asm_wrapper)
--
2.25.0.341.g760bfbb309-goog

2020-01-28 18:51:51

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v7 11/11] arm64: scs: add shadow stacks for SDEI

This change adds per-CPU shadow call stacks for the SDEI handler.
Similarly to how the kernel stacks are handled, we add separate shadow
stacks for normal and critical events.

Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/include/asm/scs.h | 2 +
arch/arm64/kernel/entry.S | 14 ++++-
arch/arm64/kernel/scs.c | 106 +++++++++++++++++++++++++++++------
arch/arm64/kernel/sdei.c | 7 +++
4 files changed, 112 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
index c50d2b0c6c5f..8e327e14bc15 100644
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -9,6 +9,7 @@
#ifdef CONFIG_SHADOW_CALL_STACK

extern void scs_init_irq(void);
+extern int scs_init_sdei(void);

static __always_inline void scs_save(struct task_struct *tsk)
{
@@ -27,6 +28,7 @@ static inline void scs_overflow_check(struct task_struct *tsk)
#else /* CONFIG_SHADOW_CALL_STACK */

static inline void scs_init_irq(void) {}
+static inline int scs_init_sdei(void) { return 0; }
static inline void scs_save(struct task_struct *tsk) {}
static inline void scs_overflow_check(struct task_struct *tsk) {}

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index f9370d768494..42183895fb84 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1050,13 +1050,16 @@ ENTRY(__sdei_asm_handler)

mov x19, x1

+#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
+ ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
+#endif
+
#ifdef CONFIG_VMAP_STACK
/*
* entry.S may have been using sp as a scratch register, find whether
* this is a normal or critical event and switch to the appropriate
* stack for this CPU.
*/
- ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
cbnz w4, 1f
ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
b 2f
@@ -1066,6 +1069,15 @@ ENTRY(__sdei_asm_handler)
mov sp, x5
#endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* Use a separate shadow call stack for normal and critical events */
+ cbnz w4, 3f
+ ldr_this_cpu dst=x18, sym=sdei_shadow_call_stack_normal_ptr, tmp=x6
+ b 4f
+3: ldr_this_cpu dst=x18, sym=sdei_shadow_call_stack_critical_ptr, tmp=x6
+4:
+#endif
+
/*
* We may have interrupted userspace, or a guest, or exit-from or
* return-to either of these. We can't trust sp_el0, restore it.
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
index eaadf5430baa..dddb7c56518b 100644
--- a/arch/arm64/kernel/scs.c
+++ b/arch/arm64/kernel/scs.c
@@ -10,31 +10,105 @@
#include <asm/pgtable.h>
#include <asm/scs.h>

-DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+#define DECLARE_SCS(name) \
+ DECLARE_PER_CPU(unsigned long *, name ## _ptr); \
+ DECLARE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name)

-#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
-DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
- __aligned(SCS_SIZE);
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+#define DEFINE_SCS(name) \
+ DEFINE_PER_CPU(unsigned long *, name ## _ptr)
+#else
+/* Allocate a static per-CPU shadow stack */
+#define DEFINE_SCS(name) \
+ DEFINE_PER_CPU(unsigned long *, name ## _ptr); \
+ DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \
+ __aligned(SCS_SIZE)
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+DECLARE_SCS(irq_shadow_call_stack);
+DECLARE_SCS(sdei_shadow_call_stack_normal);
+DECLARE_SCS(sdei_shadow_call_stack_critical);
+
+DEFINE_SCS(irq_shadow_call_stack);
+#ifdef CONFIG_ARM_SDE_INTERFACE
+DEFINE_SCS(sdei_shadow_call_stack_normal);
+DEFINE_SCS(sdei_shadow_call_stack_critical);
#endif

+static int scs_alloc_percpu(unsigned long * __percpu *ptr, int cpu)
+{
+ unsigned long *p;
+
+ p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ if (!p)
+ return -ENOMEM;
+ per_cpu(*ptr, cpu) = p;
+
+ return 0;
+}
+
+static void scs_free_percpu(unsigned long * __percpu *ptr, int cpu)
+{
+ unsigned long *p = per_cpu(*ptr, cpu);
+
+ if (p) {
+ per_cpu(*ptr, cpu) = NULL;
+ vfree(p);
+ }
+}
+
+static void scs_free_sdei(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ scs_free_percpu(&sdei_shadow_call_stack_normal_ptr, cpu);
+ scs_free_percpu(&sdei_shadow_call_stack_critical_ptr, cpu);
+ }
+}
+
void scs_init_irq(void)
{
int cpu;

for_each_possible_cpu(cpu) {
-#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
- unsigned long *p;
+ if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK_VMAP))
+ WARN_ON(scs_alloc_percpu(&irq_shadow_call_stack_ptr,
+ cpu));
+ else
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ per_cpu(irq_shadow_call_stack, cpu);
+ }
+}

- p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
- VMALLOC_START, VMALLOC_END,
- GFP_SCS, PAGE_KERNEL,
- 0, cpu_to_node(cpu),
- __builtin_return_address(0));
+int scs_init_sdei(void)
+{
+ int cpu;

- per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
-#else
- per_cpu(irq_shadow_call_stack_ptr, cpu) =
- per_cpu(irq_shadow_call_stack, cpu);
-#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+ if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
+ return 0;
+
+ for_each_possible_cpu(cpu) {
+ if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK_VMAP)) {
+ if (scs_alloc_percpu(
+ &sdei_shadow_call_stack_normal_ptr, cpu) ||
+ scs_alloc_percpu(
+ &sdei_shadow_call_stack_critical_ptr, cpu)) {
+ scs_free_sdei();
+ return -ENOMEM;
+ }
+ } else {
+ per_cpu(sdei_shadow_call_stack_normal_ptr, cpu) =
+ per_cpu(sdei_shadow_call_stack_normal, cpu);
+ per_cpu(sdei_shadow_call_stack_critical_ptr, cpu) =
+ per_cpu(sdei_shadow_call_stack_critical, cpu);
+ }
}
+
+ return 0;
}
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index d6259dac62b6..2854b9f7760a 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -13,6 +13,7 @@
#include <asm/kprobes.h>
#include <asm/mmu.h>
#include <asm/ptrace.h>
+#include <asm/scs.h>
#include <asm/sections.h>
#include <asm/stacktrace.h>
#include <asm/sysreg.h>
@@ -162,6 +163,12 @@ unsigned long sdei_arch_get_entry_point(int conduit)
return 0;
}

+ if (scs_init_sdei()) {
+ if (IS_ENABLED(CONFIG_VMAP_STACK))
+ free_sdei_stacks();
+ return 0;
+ }
+
sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;

#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
--
2.25.0.341.g760bfbb309-goog

2020-01-28 18:51:54

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v7 05/11] arm64: reserve x18 from general allocation with SCS

Reserve the x18 register from general allocation when SCS is enabled,
because the compiler uses the register to store the current task's
shadow stack pointer. Note that all external kernel modules must also be
compiled with -ffixed-x18 if the kernel has SCS enabled.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/Makefile | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index dca1a97751ab..ab26b448faa9 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -65,6 +65,10 @@ stack_protector_prepare: prepare0
include/generated/asm-offsets.h))
endif

+ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
+KBUILD_CFLAGS += -ffixed-x18
+endif
+
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
CHECKFLAGS += -D__AARCH64EB__
--
2.25.0.341.g760bfbb309-goog

2020-01-28 18:52:32

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v7 09/11] arm64: disable SCS for hypervisor code

Filter out CC_FLAGS_SCS and -ffixed-x18 for code that runs at a
different exception level.

Suggested-by: Steven Rostedt (VMware) <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/arm64/kvm/hyp/Makefile | 3 +++
1 file changed, 3 insertions(+)

diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index ea710f674cb6..5843adef9ef6 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -28,3 +28,6 @@ GCOV_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
KCOV_INSTRUMENT := n
+
+# remove the SCS flags from all objects in this directory
+KBUILD_CFLAGS := $(filter-out -ffixed-x18 $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
--
2.25.0.341.g760bfbb309-goog

2020-01-28 18:52:44

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v7 08/11] arm64: vdso: disable Shadow Call Stack

Shadow stacks are only available in the kernel, so disable SCS
instrumentation for the vDSO.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/kernel/vdso/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index dd2514bb1511..a87a4f11724e 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -25,7 +25,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING

VDSO_LDFLAGS := -Bsymbolic

-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS)
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.25.0.341.g760bfbb309-goog

2020-01-28 18:52:46

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v7 10/11] arm64: implement Shadow Call Stack

This change implements shadow stack switching, initial SCS set-up,
and interrupt shadow stacks for arm64.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/Kconfig | 5 ++++
arch/arm64/include/asm/scs.h | 37 +++++++++++++++++++++++++
arch/arm64/include/asm/thread_info.h | 3 +++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +++
arch/arm64/kernel/entry.S | 32 ++++++++++++++++++++--
arch/arm64/kernel/head.S | 9 +++++++
arch/arm64/kernel/irq.c | 2 ++
arch/arm64/kernel/process.c | 2 ++
arch/arm64/kernel/scs.c | 40 ++++++++++++++++++++++++++++
arch/arm64/kernel/smp.c | 4 +++
11 files changed, 136 insertions(+), 2 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 5616cab8dfda..147aa56dea02 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -65,6 +65,7 @@ config ARM64
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_SUPPORTS_MEMORY_FAILURE
+ select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
select ARCH_SUPPORTS_NUMA_BALANCING
@@ -1020,6 +1021,10 @@ config ARCH_HAS_CACHE_LINE_SIZE
config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y if PGTABLE_LEVELS > 2

+# Supported by clang >= 7.0
+config CC_HAVE_SHADOW_CALL_STACK
+ def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
+
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
---help---
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
new file mode 100644
index 000000000000..c50d2b0c6c5f
--- /dev/null
+++ b/arch/arm64/include/asm/scs.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/scs.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+extern void scs_init_irq(void);
+
+static __always_inline void scs_save(struct task_struct *tsk)
+{
+ void *s;
+
+ asm volatile("mov %0, x18" : "=r" (s));
+ task_set_scs(tsk, s);
+}
+
+static inline void scs_overflow_check(struct task_struct *tsk)
+{
+ if (unlikely(scs_corrupted(tsk)))
+ panic("corrupted shadow stack detected inside scheduler\n");
+}
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+static inline void scs_init_irq(void) {}
+static inline void scs_save(struct task_struct *tsk) {}
+static inline void scs_overflow_check(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* __ASSEMBLY __ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index f0cec4160136..8c73764b9ed2 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -41,6 +41,9 @@ struct thread_info {
#endif
} preempt;
};
+#ifdef CONFIG_SHADOW_CALL_STACK
+ void *shadow_call_stack;
+#endif
};

#define thread_saved_pc(tsk) \
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index fc6488660f64..08fafc4da2cf 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-y += vdso/ probes/
obj-$(CONFIG_COMPAT_VDSO) += vdso32/
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index a5bdce8af65b..d485dc5cd196 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -33,6 +33,9 @@ int main(void)
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
+#endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+ DEFINE(TSK_TI_SCS, offsetof(struct task_struct, thread_info.shadow_call_stack));
#endif
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
#ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 1b6b7a86625c..f9370d768494 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -177,6 +177,10 @@ alternative_cb_end

apply_ssbd 1, x22, x23

+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [tsk, #TSK_TI_SCS] // Restore shadow call stack
+ str xzr, [tsk, #TSK_TI_SCS] // Limit visibility of saved SCS
+#endif
.else
add x21, sp, #S_FRAME_SIZE
get_current_task tsk
@@ -278,6 +282,12 @@ alternative_else_nop_endif
ct_user_enter
.endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ .if \el == 0
+ str x18, [tsk, #TSK_TI_SCS] // Save shadow call stack
+ .endif
+#endif
+
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
@@ -383,6 +393,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

.macro irq_stack_entry
mov x19, sp // preserve the original sp
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x24, x18 // preserve the original shadow stack
+#endif

/*
* Compare sp with the base of the task stack.
@@ -400,15 +413,25 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

/* switch to the irq stack */
mov sp, x26
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* also switch to the irq shadow stack */
+ ldr_this_cpu x18, irq_shadow_call_stack_ptr, x26
+#endif
+
9998:
.endm

/*
- * x19 should be preserved between irq_stack_entry and
- * irq_stack_exit.
+ * The callee-saved regs (x19-x29) should be preserved between
+ * irq_stack_entry and irq_stack_exit, but note that kernel_entry
+ * uses x20-x23 to store data for later use.
*/
.macro irq_stack_exit
mov sp, x19
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x18, x24
+#endif
.endm

/* GPRs used by entry code */
@@ -895,6 +918,11 @@ ENTRY(cpu_switch_to)
ldr lr, [x8]
mov sp, x9
msr sp_el0, x1
+#ifdef CONFIG_SHADOW_CALL_STACK
+ str x18, [x0, #TSK_TI_SCS]
+ ldr x18, [x1, #TSK_TI_SCS]
+ str xzr, [x1, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
ret
ENDPROC(cpu_switch_to)
NOKPROBE(cpu_switch_to)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 989b1944cb71..ca561de903d4 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -27,6 +27,7 @@
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/scs.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
@@ -424,6 +425,10 @@ __primary_switched:
stp xzr, x30, [sp, #-16]!
mov x29, sp

+#ifdef CONFIG_SHADOW_CALL_STACK
+ adr_l x18, init_shadow_call_stack // Set shadow call stack
+#endif
+
str_l x21, __fdt_pointer, x5 // Save FDT pointer

ldr_l x4, kimage_vaddr // Save the offset between
@@ -731,6 +736,10 @@ __secondary_switched:
ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow
msr sp_el0, x2
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [x2, #TSK_TI_SCS] // set shadow call stack
+ str xzr, [x2, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
mov x29, #0
mov x30, #0
b secondary_start_kernel
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 04a327ccf84d..fe0ca522ff60 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -21,6 +21,7 @@
#include <linux/vmalloc.h>
#include <asm/daifflags.h>
#include <asm/vmap_stack.h>
+#include <asm/scs.h>

unsigned long irq_err_count;

@@ -63,6 +64,7 @@ static void init_irq_stacks(void)
void __init init_IRQ(void)
{
init_irq_stacks();
+ scs_init_irq();
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index bbb0f0c145f6..df45eb09845b 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -52,6 +52,7 @@
#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/pointer_auth.h>
+#include <asm/scs.h>
#include <asm/stacktrace.h>

#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
@@ -507,6 +508,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
uao_thread_switch(next);
ptrauth_thread_switch(next);
ssbs_thread_switch(next);
+ scs_overflow_check(next);

/*
* Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
new file mode 100644
index 000000000000..eaadf5430baa
--- /dev/null
+++ b/arch/arm64/kernel/scs.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/percpu.h>
+#include <linux/vmalloc.h>
+#include <asm/pgtable.h>
+#include <asm/scs.h>
+
+DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+
+#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
+DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
+ __aligned(SCS_SIZE);
+#endif
+
+void scs_init_irq(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+ unsigned long *p;
+
+ p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
+#else
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ per_cpu(irq_shadow_call_stack, cpu);
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+ }
+}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index d4ed9a19d8fe..f2cb344f998c 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -46,6 +46,7 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
+#include <asm/scs.h>
#include <asm/smp_plat.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -358,6 +359,9 @@ void cpu_die(void)
{
unsigned int cpu = smp_processor_id();

+ /* Save the shadow stack pointer before exiting the idle task */
+ scs_save(current);
+
idle_task_exit();

local_daif_mask();
--
2.25.0.341.g760bfbb309-goog

2020-01-28 22:51:08

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v7 04/11] scs: disable when function graph tracing is enabled

On Tue, Jan 28, 2020 at 10:49:27AM -0800, Sami Tolvanen wrote:
> The graph tracer hooks returns by modifying frame records on the
> (regular) stack, but with SCS the return address is taken from the
> shadow stack, and the value in the frame record has no effect. As we
> don't currently have a mechanism to determine the corresponding slot
> on the shadow stack (and to pass this through the ftrace
> infrastructure), for now let's disable SCS when the graph tracer is
> enabled.
>
> Signed-off-by: Sami Tolvanen <[email protected]>

Reviewed-by: Kees Cook <[email protected]>

-Kees

> ---
> arch/Kconfig | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 1b16aa9a3fe5..0d746373c52e 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -530,6 +530,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
>
> config SHADOW_CALL_STACK
> bool "Clang Shadow Call Stack"
> + depends on !FUNCTION_GRAPH_TRACER
> depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
> help
> This option enables Clang's Shadow Call Stack, which uses a
> --
> 2.25.0.341.g760bfbb309-goog
>

--
Kees Cook

2020-02-10 16:43:09

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v7 07/11] arm64: efi: restore x18 if it was corrupted

On Tue, Jan 28, 2020 at 10:49:30AM -0800, Sami Tolvanen wrote:
> If we detect a corrupted x18, restore the register before jumping back
> to potentially SCS instrumented code. This is safe, because the wrapper
> is called with preemption disabled and a separate shadow stack is used
> for interrupt handling.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
> arch/arm64/kernel/efi-rt-wrapper.S | 11 ++++++++++-
> 1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
> index 3fc71106cb2b..6ca6c0dc11a1 100644
> --- a/arch/arm64/kernel/efi-rt-wrapper.S
> +++ b/arch/arm64/kernel/efi-rt-wrapper.S
> @@ -34,5 +34,14 @@ ENTRY(__efi_rt_asm_wrapper)
> ldp x29, x30, [sp], #32
> b.ne 0f
> ret
> -0: b efi_handle_corrupted_x18 // tail call
> +0:
> + /*
> + * With CONFIG_SHADOW_CALL_STACK, the kernel uses x18 to store a
> + * shadow stack pointer, which we need to restore before returning to
> + * potentially instrumented code. This is safe because the wrapper is
> + * called with preemption disabled and a separate shadow stack is used
> + * for interrupts.
> + */
> + mov x18, x2
> + b efi_handle_corrupted_x18 // tail call
> ENDPROC(__efi_rt_asm_wrapper)

Acked-by: Will Deacon <[email protected]>

Will

2020-02-10 16:45:13

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v7 09/11] arm64: disable SCS for hypervisor code

On Tue, Jan 28, 2020 at 10:49:32AM -0800, Sami Tolvanen wrote:
> Filter out CC_FLAGS_SCS and -ffixed-x18 for code that runs at a
> different exception level.
>
> Suggested-by: Steven Rostedt (VMware) <[email protected]>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> Reviewed-by: Mark Rutland <[email protected]>
> ---
> arch/arm64/kvm/hyp/Makefile | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
> index ea710f674cb6..5843adef9ef6 100644
> --- a/arch/arm64/kvm/hyp/Makefile
> +++ b/arch/arm64/kvm/hyp/Makefile
> @@ -28,3 +28,6 @@ GCOV_PROFILE := n
> KASAN_SANITIZE := n
> UBSAN_SANITIZE := n
> KCOV_INSTRUMENT := n
> +
> +# remove the SCS flags from all objects in this directory
> +KBUILD_CFLAGS := $(filter-out -ffixed-x18 $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))

Acked-by: Will Deacon <[email protected]>

Will

2020-02-10 17:19:37

by James Morse

[permalink] [raw]
Subject: Re: [PATCH v7 09/11] arm64: disable SCS for hypervisor code

Hi Sami,

On 28/01/2020 18:49, Sami Tolvanen wrote:
> Filter out CC_FLAGS_SCS and -ffixed-x18 for code that runs at a
> different exception level.

Hmmm, there are two things being disabled here.

Stashing the lr in memory pointed to by VA won't work transparently at EL2 ... but
shouldn't KVM's C code still treat x18 as a fixed register?


As you have an __attribute__((no_sanitize("shadow-call-stack"))), could we add that to
__hyp_text instead? (its a smaller hammer!) All of KVM's EL2 code is marked __hyp_text,
but that isn't everything in these files. Doing it like this would leave KVM's VHE-only
paths covered.

As an example, with VHE the kernel and KVM both run at EL2, and KVM behaves differently:
kvm_vcpu_put_sysregs() in kvm/hyp/sysreg-sr.c is called from a preempt notifier as
the EL2 registers are always accessible.


Thanks,

James

> diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
> index ea710f674cb6..5843adef9ef6 100644
> --- a/arch/arm64/kvm/hyp/Makefile
> +++ b/arch/arm64/kvm/hyp/Makefile
> @@ -28,3 +28,6 @@ GCOV_PROFILE := n
> KASAN_SANITIZE := n
> UBSAN_SANITIZE := n
> KCOV_INSTRUMENT := n
> +
> +# remove the SCS flags from all objects in this directory
> +KBUILD_CFLAGS := $(filter-out -ffixed-x18 $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))

2020-02-10 17:53:50

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v7 09/11] arm64: disable SCS for hypervisor code

On Mon, Feb 10, 2020 at 05:18:58PM +0000, James Morse wrote:
> On 28/01/2020 18:49, Sami Tolvanen wrote:
> > Filter out CC_FLAGS_SCS and -ffixed-x18 for code that runs at a
> > different exception level.
>
> Hmmm, there are two things being disabled here.
>
> Stashing the lr in memory pointed to by VA won't work transparently at EL2 ... but
> shouldn't KVM's C code still treat x18 as a fixed register?

My review of v6 suggested dropping the -ffixed-x18 as well, since it's only
introduced by SCS (in patch 5) and so isn't required by anything else. Why
do you think it's needed?

> As you have an __attribute__((no_sanitize("shadow-call-stack"))), could we add that to
> __hyp_text instead? (its a smaller hammer!) All of KVM's EL2 code is marked __hyp_text,
> but that isn't everything in these files. Doing it like this would leave KVM's VHE-only
> paths covered.
>
> As an example, with VHE the kernel and KVM both run at EL2, and KVM behaves differently:
> kvm_vcpu_put_sysregs() in kvm/hyp/sysreg-sr.c is called from a preempt notifier as
> the EL2 registers are always accessible.

That's a good point, and I agree that it would be nice to have SCS covering
the VHE paths. If you do that as a function attribute (which feels pretty
fragile to me), then I guess we'll have to keep the -ffixed-x18 for the
non-VHE code after all because GCC at least doesn't like having the register
saving ABI specified on a per-function basis.

Will

2020-02-10 18:04:17

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v7 09/11] arm64: disable SCS for hypervisor code

On Mon, Feb 10, 2020 at 05:52:15PM +0000, Will Deacon wrote:
> On Mon, Feb 10, 2020 at 05:18:58PM +0000, James Morse wrote:
> > On 28/01/2020 18:49, Sami Tolvanen wrote:
> > > Filter out CC_FLAGS_SCS and -ffixed-x18 for code that runs at a
> > > different exception level.
> >
> > Hmmm, there are two things being disabled here.
> >
> > Stashing the lr in memory pointed to by VA won't work transparently at EL2 ... but
> > shouldn't KVM's C code still treat x18 as a fixed register?
>
> My review of v6 suggested dropping the -ffixed-x18 as well, since it's only
> introduced by SCS (in patch 5) and so isn't required by anything else. Why
> do you think it's needed?

When EL1 code calls up to hyp, it expects x18 to be preserved across the
call, so hyp needs to either preserve it explicitly across a transitions
from/to EL1 or always preserve it.

The latter is easiest since any code used by VHE hyp code will need x18
saved anyway (ans so any common hyp code needs to).

Thanks,
Mark.

2020-02-10 18:08:39

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v7 09/11] arm64: disable SCS for hypervisor code

On Mon, Feb 10, 2020 at 06:03:28PM +0000, Mark Rutland wrote:
> On Mon, Feb 10, 2020 at 05:52:15PM +0000, Will Deacon wrote:
> > On Mon, Feb 10, 2020 at 05:18:58PM +0000, James Morse wrote:
> > > On 28/01/2020 18:49, Sami Tolvanen wrote:
> > > > Filter out CC_FLAGS_SCS and -ffixed-x18 for code that runs at a
> > > > different exception level.
> > >
> > > Hmmm, there are two things being disabled here.
> > >
> > > Stashing the lr in memory pointed to by VA won't work transparently at EL2 ... but
> > > shouldn't KVM's C code still treat x18 as a fixed register?
> >
> > My review of v6 suggested dropping the -ffixed-x18 as well, since it's only
> > introduced by SCS (in patch 5) and so isn't required by anything else. Why
> > do you think it's needed?
>
> When EL1 code calls up to hyp, it expects x18 to be preserved across the
> call, so hyp needs to either preserve it explicitly across a transitions
> from/to EL1 or always preserve it.

I thought we explicitly saved/restored it across the call after
af12376814a5 ("arm64: kvm: stop treating register x18 as caller save"). Is
that not sufficient?

> The latter is easiest since any code used by VHE hyp code will need x18
> saved anyway (ans so any common hyp code needs to).

I would personally prefer to split the VHE and non-VHE code so they can be
compiled with separate options.

Will

2020-02-10 18:25:00

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v7 09/11] arm64: disable SCS for hypervisor code

On Mon, Feb 10, 2020 at 06:07:41PM +0000, Will Deacon wrote:
> On Mon, Feb 10, 2020 at 06:03:28PM +0000, Mark Rutland wrote:
> > On Mon, Feb 10, 2020 at 05:52:15PM +0000, Will Deacon wrote:
> > > On Mon, Feb 10, 2020 at 05:18:58PM +0000, James Morse wrote:
> > > > On 28/01/2020 18:49, Sami Tolvanen wrote:
> > > > > Filter out CC_FLAGS_SCS and -ffixed-x18 for code that runs at a
> > > > > different exception level.
> > > >
> > > > Hmmm, there are two things being disabled here.
> > > >
> > > > Stashing the lr in memory pointed to by VA won't work transparently at EL2 ... but
> > > > shouldn't KVM's C code still treat x18 as a fixed register?
> > >
> > > My review of v6 suggested dropping the -ffixed-x18 as well, since it's only
> > > introduced by SCS (in patch 5) and so isn't required by anything else. Why
> > > do you think it's needed?
> >
> > When EL1 code calls up to hyp, it expects x18 to be preserved across the
> > call, so hyp needs to either preserve it explicitly across a transitions
> > from/to EL1 or always preserve it.
>
> I thought we explicitly saved/restored it across the call after
> af12376814a5 ("arm64: kvm: stop treating register x18 as caller save"). Is
> that not sufficient?

That covers the hyp->guest->hyp round trip, but not the host->hyp->host
portion surrounding that.

Anywhere we use __call_hyp() expects x18 to be preserved across the
call, and that's not only used to enter the guest. If we don't want to
do that naturally at EL2, we'd probably have to add that to
do_el2_call in arch/arm64/kvm/hyp/hyp-entry.S.

Thanks,
Mark.

2020-02-11 09:16:34

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH v7 09/11] arm64: disable SCS for hypervisor code

On 2020-02-10 18:07, Will Deacon wrote:
> On Mon, Feb 10, 2020 at 06:03:28PM +0000, Mark Rutland wrote:
>> On Mon, Feb 10, 2020 at 05:52:15PM +0000, Will Deacon wrote:
>> > On Mon, Feb 10, 2020 at 05:18:58PM +0000, James Morse wrote:
>> > > On 28/01/2020 18:49, Sami Tolvanen wrote:
>> > > > Filter out CC_FLAGS_SCS and -ffixed-x18 for code that runs at a
>> > > > different exception level.
>> > >
>> > > Hmmm, there are two things being disabled here.
>> > >
>> > > Stashing the lr in memory pointed to by VA won't work transparently at EL2 ... but
>> > > shouldn't KVM's C code still treat x18 as a fixed register?
>> >
>> > My review of v6 suggested dropping the -ffixed-x18 as well, since it's only
>> > introduced by SCS (in patch 5) and so isn't required by anything else. Why
>> > do you think it's needed?
>>
>> When EL1 code calls up to hyp, it expects x18 to be preserved across
>> the
>> call, so hyp needs to either preserve it explicitly across a
>> transitions
>> from/to EL1 or always preserve it.
>
> I thought we explicitly saved/restored it across the call after
> af12376814a5 ("arm64: kvm: stop treating register x18 as caller save").
> Is
> that not sufficient?
>
>> The latter is easiest since any code used by VHE hyp code will need
>> x18
>> saved anyway (ans so any common hyp code needs to).
>
> I would personally prefer to split the VHE and non-VHE code so they can
> be
> compiled with separate options.

This is going to generate a lot of code duplication (or at least object
duplication),
as the two code paths are intricately linked and splitting them to
support different
compilation options and/or calling conventions.

I'm not fundamentally opposed to that, but it should come with ways to
still
manage it as a unified code base as much as possible, as ways to discard
the
unused part at runtime (which should become easy to do once we have two
distinct sets of objects).

M.
--
Jazz is not dead. It just smells funny...

2020-02-11 09:55:40

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v7 09/11] arm64: disable SCS for hypervisor code

On Mon, Feb 10, 2020 at 06:24:32PM +0000, Mark Rutland wrote:
> On Mon, Feb 10, 2020 at 06:07:41PM +0000, Will Deacon wrote:
> > On Mon, Feb 10, 2020 at 06:03:28PM +0000, Mark Rutland wrote:
> > > On Mon, Feb 10, 2020 at 05:52:15PM +0000, Will Deacon wrote:
> > > > On Mon, Feb 10, 2020 at 05:18:58PM +0000, James Morse wrote:
> > > > > On 28/01/2020 18:49, Sami Tolvanen wrote:
> > > > > > Filter out CC_FLAGS_SCS and -ffixed-x18 for code that runs at a
> > > > > > different exception level.
> > > > >
> > > > > Hmmm, there are two things being disabled here.
> > > > >
> > > > > Stashing the lr in memory pointed to by VA won't work transparently at EL2 ... but
> > > > > shouldn't KVM's C code still treat x18 as a fixed register?
> > > >
> > > > My review of v6 suggested dropping the -ffixed-x18 as well, since it's only
> > > > introduced by SCS (in patch 5) and so isn't required by anything else. Why
> > > > do you think it's needed?
> > >
> > > When EL1 code calls up to hyp, it expects x18 to be preserved across the
> > > call, so hyp needs to either preserve it explicitly across a transitions
> > > from/to EL1 or always preserve it.
> >
> > I thought we explicitly saved/restored it across the call after
> > af12376814a5 ("arm64: kvm: stop treating register x18 as caller save"). Is
> > that not sufficient?
>
> That covers the hyp->guest->hyp round trip, but not the host->hyp->host
> portion surrounding that.

Thanks, I missed that. It's annoying that we'll end up needing /both/
-ffixed-x18 *and* the save/restore around guest transitions, but if we
actually want to use SCS for the VHE code then I see that it will be
required.

Sami -- can you restore -ffixed-x18 and then try the function attribute
as suggested by James, please?

Will

2020-02-11 09:55:54

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v7 09/11] arm64: disable SCS for hypervisor code

On Tue, Feb 11, 2020 at 09:14:52AM +0000, Marc Zyngier wrote:
> On 2020-02-10 18:07, Will Deacon wrote:
> > On Mon, Feb 10, 2020 at 06:03:28PM +0000, Mark Rutland wrote:
> > > On Mon, Feb 10, 2020 at 05:52:15PM +0000, Will Deacon wrote:
> > > > On Mon, Feb 10, 2020 at 05:18:58PM +0000, James Morse wrote:
> > > > > On 28/01/2020 18:49, Sami Tolvanen wrote:
> > > > > > Filter out CC_FLAGS_SCS and -ffixed-x18 for code that runs at a
> > > > > > different exception level.
> > > > >
> > > > > Hmmm, there are two things being disabled here.
> > > > >
> > > > > Stashing the lr in memory pointed to by VA won't work transparently at EL2 ... but
> > > > > shouldn't KVM's C code still treat x18 as a fixed register?
> > > >
> > > > My review of v6 suggested dropping the -ffixed-x18 as well, since it's only
> > > > introduced by SCS (in patch 5) and so isn't required by anything else. Why
> > > > do you think it's needed?
> > >
> > > When EL1 code calls up to hyp, it expects x18 to be preserved across
> > > the
> > > call, so hyp needs to either preserve it explicitly across a
> > > transitions
> > > from/to EL1 or always preserve it.
> >
> > I thought we explicitly saved/restored it across the call after
> > af12376814a5 ("arm64: kvm: stop treating register x18 as caller save").
> > Is
> > that not sufficient?
> >
> > > The latter is easiest since any code used by VHE hyp code will need
> > > x18
> > > saved anyway (ans so any common hyp code needs to).
> >
> > I would personally prefer to split the VHE and non-VHE code so they can
> > be
> > compiled with separate options.
>
> This is going to generate a lot of code duplication (or at least object
> duplication),
> as the two code paths are intricately linked and splitting them to support
> different
> compilation options and/or calling conventions.
>
> I'm not fundamentally opposed to that, but it should come with ways to still
> manage it as a unified code base as much as possible, as ways to discard the
> unused part at runtime (which should become easy to do once we have two
> distinct sets of objects).

Agreed, and I don't want to hold up the SCS patches because of this. I'm
just nervous about the function attribute because I've only ever had
terrible experiences with them. Maybe it will work this time (!)

Will

2020-02-11 10:02:27

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH v7 09/11] arm64: disable SCS for hypervisor code

On 2020-02-11 09:55, Will Deacon wrote:
> On Tue, Feb 11, 2020 at 09:14:52AM +0000, Marc Zyngier wrote:
>> On 2020-02-10 18:07, Will Deacon wrote:
>> > On Mon, Feb 10, 2020 at 06:03:28PM +0000, Mark Rutland wrote:
>> > > On Mon, Feb 10, 2020 at 05:52:15PM +0000, Will Deacon wrote:
>> > > > On Mon, Feb 10, 2020 at 05:18:58PM +0000, James Morse wrote:
>> > > > > On 28/01/2020 18:49, Sami Tolvanen wrote:
>> > > > > > Filter out CC_FLAGS_SCS and -ffixed-x18 for code that runs at a
>> > > > > > different exception level.
>> > > > >
>> > > > > Hmmm, there are two things being disabled here.
>> > > > >
>> > > > > Stashing the lr in memory pointed to by VA won't work transparently at EL2 ... but
>> > > > > shouldn't KVM's C code still treat x18 as a fixed register?
>> > > >
>> > > > My review of v6 suggested dropping the -ffixed-x18 as well, since it's only
>> > > > introduced by SCS (in patch 5) and so isn't required by anything else. Why
>> > > > do you think it's needed?
>> > >
>> > > When EL1 code calls up to hyp, it expects x18 to be preserved across
>> > > the
>> > > call, so hyp needs to either preserve it explicitly across a
>> > > transitions
>> > > from/to EL1 or always preserve it.
>> >
>> > I thought we explicitly saved/restored it across the call after
>> > af12376814a5 ("arm64: kvm: stop treating register x18 as caller save").
>> > Is
>> > that not sufficient?
>> >
>> > > The latter is easiest since any code used by VHE hyp code will need
>> > > x18
>> > > saved anyway (ans so any common hyp code needs to).
>> >
>> > I would personally prefer to split the VHE and non-VHE code so they can
>> > be
>> > compiled with separate options.
>>
>> This is going to generate a lot of code duplication (or at least
>> object
>> duplication),
>> as the two code paths are intricately linked and splitting them to
>> support
>> different
>> compilation options and/or calling conventions.
>>
>> I'm not fundamentally opposed to that, but it should come with ways to
>> still
>> manage it as a unified code base as much as possible, as ways to
>> discard the
>> unused part at runtime (which should become easy to do once we have
>> two
>> distinct sets of objects).
>
> Agreed, and I don't want to hold up the SCS patches because of this.
> I'm
> just nervous about the function attribute because I've only ever had
> terrible experiences with them. Maybe it will work this time (!)

I have the same experience chasing missing __hyp_text attributes. Until
we
have tooling that picks on this *at compile time*, we'll have to play
wack-a-mole with them...

M.
--
Jazz is not dead. It just smells funny...

2020-02-11 14:09:56

by James Morse

[permalink] [raw]
Subject: Re: [PATCH v7 00/11] add support for Clang's Shadow Call Stack

Hi Sami,

On 28/01/2020 18:49, Sami Tolvanen wrote:
> This patch series adds support for Clang's Shadow Call Stack
> (SCS) mitigation, which uses a separately allocated shadow stack
> to protect against return address overwrites. More information
> can be found here:
>
> https://clang.llvm.org/docs/ShadowCallStack.html
>
> SCS provides better protection against traditional buffer
> overflows than CONFIG_STACKPROTECTOR_*, but it should be noted
> that SCS security guarantees in the kernel differ from the ones
> documented for user space. The kernel must store addresses of
> shadow stacks used by inactive tasks and interrupt handlers in
> memory, which means an attacker capable reading and writing
> arbitrary memory may be able to locate them and hijack control
> flow by modifying shadow stacks that are not currently in use.
>
> SCS is currently supported only on arm64, where the compiler
> requires the x18 register to be reserved for holding the current
> task's shadow stack pointer.

I found I had to add:
| KBUILD_CFLAGS := $(filter-out -ffixed-x18 $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))

to drivers/firmware/efi/libstub/Makefile, to get this going.
I don't think there is much point supporting SCS for the EFIstub, its already isolated
from the rest of the kernel's C code by the __efistub symbol prefix machinery, and trying
to use it would expose us to buggy firmware at a point we can't handle it!

I can send a patch if its easier for you,


Thanks,

James

2020-02-11 14:11:13

by James Morse

[permalink] [raw]
Subject: Re: [PATCH v7 11/11] arm64: scs: add shadow stacks for SDEI

Hi Sami,

On 28/01/2020 18:49, Sami Tolvanen wrote:
> This change adds per-CPU shadow call stacks for the SDEI handler.
> Similarly to how the kernel stacks are handled, we add separate shadow
> stacks for normal and critical events.

Reviewed-by: James Morse <[email protected]>
Tested-by: James Morse <[email protected]>


> diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
> index eaadf5430baa..dddb7c56518b 100644
> --- a/arch/arm64/kernel/scs.c
> +++ b/arch/arm64/kernel/scs.c

> +static int scs_alloc_percpu(unsigned long * __percpu *ptr, int cpu)
> +{
> + unsigned long *p;
> +
> + p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
> + VMALLOC_START, VMALLOC_END,
> + GFP_SCS, PAGE_KERNEL,
> + 0, cpu_to_node(cpu),
> + __builtin_return_address(0));

(What makes this arch specific? arm64 has its own calls like this for the regular vmap
stacks because it plays tricks with the alignment. Here the alignment requirement comes
from the core SCS code... Would another architecture implement these
scs_alloc_percpu()/scs_free_percpu() differently?)


Thanks,

James

2020-02-12 17:32:42

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v7 09/11] arm64: disable SCS for hypervisor code

On Tue, Feb 11, 2020 at 1:54 AM Will Deacon <[email protected]> wrote:
> Thanks, I missed that. It's annoying that we'll end up needing /both/
> -ffixed-x18 *and* the save/restore around guest transitions, but if we
> actually want to use SCS for the VHE code then I see that it will be
> required.
>
> Sami -- can you restore -ffixed-x18 and then try the function attribute
> as suggested by James, please?

Sure. Adding __noscs to __hyp_text and not filtering out any of the
flags in the Makefile appears to work. I'll update this in the next
version.

Sami

2020-02-12 17:37:30

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v7 00/11] add support for Clang's Shadow Call Stack

On Tue, Feb 11, 2020 at 5:57 AM James Morse <[email protected]> wrote:
> I found I had to add:
> | KBUILD_CFLAGS := $(filter-out -ffixed-x18 $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
>
> to drivers/firmware/efi/libstub/Makefile, to get this going.

Ah, good catch!

> I don't think there is much point supporting SCS for the EFIstub, its already isolated
> from the rest of the kernel's C code by the __efistub symbol prefix machinery, and trying
> to use it would expose us to buggy firmware at a point we can't handle it!

Yes, fully agreed.

> I can send a patch if its easier for you,

It's not a problem, I will include a patch for this in v8.

Sami

2020-02-12 21:00:02

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v7 11/11] arm64: scs: add shadow stacks for SDEI

On Tue, Feb 11, 2020 at 5:57 AM James Morse <[email protected]> wrote:
>
> Hi Sami,
>
> On 28/01/2020 18:49, Sami Tolvanen wrote:
> > This change adds per-CPU shadow call stacks for the SDEI handler.
> > Similarly to how the kernel stacks are handled, we add separate shadow
> > stacks for normal and critical events.
>
> Reviewed-by: James Morse <[email protected]>
> Tested-by: James Morse <[email protected]>

Thank you for taking the time to test this, James!

> > diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
> > index eaadf5430baa..dddb7c56518b 100644
> > --- a/arch/arm64/kernel/scs.c
> > +++ b/arch/arm64/kernel/scs.c
>
> > +static int scs_alloc_percpu(unsigned long * __percpu *ptr, int cpu)
> > +{
> > + unsigned long *p;
> > +
> > + p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
> > + VMALLOC_START, VMALLOC_END,
> > + GFP_SCS, PAGE_KERNEL,
> > + 0, cpu_to_node(cpu),
> > + __builtin_return_address(0));
>
> (What makes this arch specific? arm64 has its own calls like this for the regular vmap
> stacks because it plays tricks with the alignment. Here the alignment requirement comes
> from the core SCS code... Would another architecture implement these
> scs_alloc_percpu()/scs_free_percpu() differently?)

You are correct, these aren't necessarily specific to arm64. However,
right now, we are not allocating per-CPU shadow stacks anywhere else,
so this was a natural place for the helper functions. Would you prefer
me to move these to kernel/scs.c instead?

Sami

2020-02-14 18:14:02

by James Morse

[permalink] [raw]
Subject: Re: [PATCH v7 11/11] arm64: scs: add shadow stacks for SDEI

Hi Sami,

On 12/02/2020 20:59, Sami Tolvanen wrote:
> On Tue, Feb 11, 2020 at 5:57 AM James Morse <[email protected]> wrote:
>> On 28/01/2020 18:49, Sami Tolvanen wrote:
>>> This change adds per-CPU shadow call stacks for the SDEI handler.
>>> Similarly to how the kernel stacks are handled, we add separate shadow
>>> stacks for normal and critical events.
>>
>> Reviewed-by: James Morse <[email protected]>
>> Tested-by: James Morse <[email protected]>

>>> diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
>>> index eaadf5430baa..dddb7c56518b 100644
>>> --- a/arch/arm64/kernel/scs.c
>>> +++ b/arch/arm64/kernel/scs.c
>>
>>> +static int scs_alloc_percpu(unsigned long * __percpu *ptr, int cpu)
>>> +{
>>> + unsigned long *p;
>>> +
>>> + p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
>>> + VMALLOC_START, VMALLOC_END,
>>> + GFP_SCS, PAGE_KERNEL,
>>> + 0, cpu_to_node(cpu),
>>> + __builtin_return_address(0));
>>
>> (What makes this arch specific? arm64 has its own calls like this for the regular vmap
>> stacks because it plays tricks with the alignment. Here the alignment requirement comes
>> from the core SCS code... Would another architecture implement these
>> scs_alloc_percpu()/scs_free_percpu() differently?)
>
> You are correct, these aren't necessarily specific to arm64. However,
> right now, we are not allocating per-CPU shadow stacks anywhere else,
> so this was a natural place for the helper functions.

Fair enough,


> Would you prefer me to move these to kernel/scs.c instead?

I have no preference, as long as they don't get duplicated later!


Thanks,

James

2020-02-19 00:09:29

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v8 00/12] add support for Clang's Shadow Call Stack

This patch series adds support for Clang's Shadow Call Stack
(SCS) mitigation, which uses a separately allocated shadow stack
to protect against return address overwrites. More information
can be found here:

https://clang.llvm.org/docs/ShadowCallStack.html

SCS provides better protection against traditional buffer
overflows than CONFIG_STACKPROTECTOR_*, but it should be noted
that SCS security guarantees in the kernel differ from the ones
documented for user space. The kernel must store addresses of
shadow stacks used by inactive tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

SCS is currently supported only on arm64, where the compiler
requires the x18 register to be reserved for holding the current
task's shadow stack pointer.

With -fsanitize=shadow-call-stack, the compiler injects
instructions to all non-leaf C functions to store the return
address to the shadow stack, and unconditionally load it again
before returning. As a result, SCS is currently incompatible
with features that rely on modifying function return addresses
in the kernel stack to alter control flow, such as function
graph tracing, although it may be possible to later change these
features to modify the shadow stack instead. A copy of the return
address is still kept in the kernel stack for compatibility with
stack unwinding, for example.

SCS has a minimal performance overhead, but allocating
shadow stacks increases kernel memory usage. The feature is
therefore mostly useful on hardware that lacks support for PAC
instructions.

Changes in v8:
- Added __noscs to __hyp_text instead of filtering SCS flags from
the entire arch/arm64/kvm/hyp directory.
- Added a patch to filter out -ffixed-x18 and SCS flags from the
EFI stub.

Changes in v7:
- Changed irq_stack_entry/exit to store the shadow stack pointer
in x24 instead of x20 as kernel_entry uses x20-x23 to store
data that can be used later. Updated the comment as well.
- Changed the Makefile in arch/arm64/kvm/hyp to also filter out
-ffixed-x18.
- Changed SHADOW_CALL_STACK to depend on !FUNCTION_GRAPH_TRACER
instead of not selecting HAVE_FUNCTION_GRAPH_TRACER with SCS.
- Removed ifdefs from the EFI wrapper and updated the comment to
explain why we are restoring x18.
- Rebased as Ard's x18 patches that were part of this series have
already been merged.

Changes in v6:
- Updated comment in the EFI RT wrapper to include the
explanation from the commit message.
- Fixed the SHADOW_CALL_STACK_VMAP config option and the
compilation errors in scs_init_irq()
- Updated the comment in entry.S to Mark's suggestion
- Fixed the WARN_ON in scs_init() to trip only when the return
value for cpuhp_setup_state() is < 0.
- Removed ifdefs from the code in arch/arm64/kernel/scs.c and
added separate shadow stacks for the SDEI handler

Changes in v5:
- Updated the comment in __scs_base() to Mark's suggestion
- Changed all instances of uintptr_t to unsigned long
- Added allocation poisoning for KASAN to catch unintentional
shadow stack accesses; moved set_set_magic before poisoning
and switched scs_used() and scs_corrupted() to access the
buffer using READ_ONCE_NOCHECK() instead
- Changed scs_free() to check for NULL instead of zero
- Renamed SCS_CACHE_SIZE to NR_CACHED_SCS
- Added a warning if cpuhp_setup_state fails in scs_init()
- Dropped patches disabling kretprobes after confirming there's
no functional conflict with SCS instrumentation
- Added an explanation to the commit message why function graph
tracing and SCS are incompatible
- Removed the ifdefs from arch/arm64/mm/proc.S and added
comments explaining why we are saving and restoring x18
- Updated scs_check_usage format to include process information

Changes in v4:
- Fixed authorship for Ard's patches
- Added missing commit messages
- Commented code that clears SCS from thread_info
- Added a comment about SCS_END_MAGIC being non-canonical

Changes in v3:
- Switched to filter-out for removing SCS flags in Makefiles
- Changed the __noscs attribute to use __no_sanitize__("...")
instead of no_sanitize("...")
- Cleaned up inline function definitions and moved task_scs()
into a macro
- Cleaned up scs_free() and scs_magic()
- Moved SCS initialization into dup_task_struct() and removed
the now unused scs_task_init()
- Added comments to __scs_base() and scs_task_reset() to better
document design choices
- Changed copy_page to make the offset and bias explicit

Changes in v2:
- Changed Ard's KVM patch to use x29 instead of x18 for the
guest context, which makes restore_callee_saved_regs cleaner
- Updated help text (and commit messages) to point out
differences in security properties compared to user space SCS
- Cleaned up config options: removed the ROP protection choice,
replaced the CC_IS_CLANG dependency with an arch-specific
cc-option test, and moved disabling of incompatible config
options to an arch-specific Kconfig
- Added CC_FLAGS_SCS, which are filtered out where needed
instead of using DISABLE_SCS
- Added a __has_feature guard around __noscs for older clang
versions

Sami Tolvanen (12):
add support for Clang's Shadow Call Stack (SCS)
scs: add accounting
scs: add support for stack usage debugging
scs: disable when function graph tracing is enabled
arm64: reserve x18 from general allocation with SCS
arm64: preserve x18 when CPU is suspended
arm64: efi: restore x18 if it was corrupted
arm64: vdso: disable Shadow Call Stack
arm64: disable SCS for hypervisor code
arm64: implement Shadow Call Stack
arm64: scs: add shadow stacks for SDEI
efi/libstub: disable SCS

Makefile | 6 +
arch/Kconfig | 35 ++++
arch/arm64/Kconfig | 5 +
arch/arm64/Makefile | 4 +
arch/arm64/include/asm/kvm_hyp.h | 2 +-
arch/arm64/include/asm/scs.h | 39 ++++
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/include/asm/thread_info.h | 3 +
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +
arch/arm64/kernel/efi-rt-wrapper.S | 11 +-
arch/arm64/kernel/entry.S | 46 ++++-
arch/arm64/kernel/head.S | 9 +
arch/arm64/kernel/irq.c | 2 +
arch/arm64/kernel/process.c | 2 +
arch/arm64/kernel/scs.c | 114 ++++++++++++
arch/arm64/kernel/sdei.c | 7 +
arch/arm64/kernel/smp.c | 4 +
arch/arm64/kernel/vdso/Makefile | 2 +-
arch/arm64/mm/proc.S | 14 ++
drivers/base/node.c | 6 +
drivers/firmware/efi/libstub/Makefile | 3 +
fs/proc/meminfo.c | 4 +
include/linux/compiler-clang.h | 6 +
include/linux/compiler_types.h | 4 +
include/linux/mmzone.h | 3 +
include/linux/scs.h | 57 ++++++
init/init_task.c | 8 +
kernel/Makefile | 1 +
kernel/fork.c | 9 +
kernel/sched/core.c | 2 +
kernel/scs.c | 246 ++++++++++++++++++++++++++
mm/page_alloc.c | 6 +
mm/vmstat.c | 3 +
34 files changed, 662 insertions(+), 7 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c


base-commit: 2b72104b8c12504176fb5fc1442d6e54e31e338b
--
2.25.0.265.gbab2e86ba0-goog

2020-02-19 00:09:29

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v8 01/12] add support for Clang's Shadow Call Stack (SCS)

This change adds generic support for Clang's Shadow Call Stack,
which uses a shadow stack to protect return addresses from being
overwritten by an attacker. Details are available here:

https://clang.llvm.org/docs/ShadowCallStack.html

Note that security guarantees in the kernel differ from the
ones documented for user space. The kernel must store addresses
of shadow stacks used by other tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Miguel Ojeda <[email protected]>
---
Makefile | 6 ++
arch/Kconfig | 34 ++++++
include/linux/compiler-clang.h | 6 ++
include/linux/compiler_types.h | 4 +
include/linux/scs.h | 57 ++++++++++
init/init_task.c | 8 ++
kernel/Makefile | 1 +
kernel/fork.c | 9 ++
kernel/sched/core.c | 2 +
kernel/scs.c | 187 +++++++++++++++++++++++++++++++++
10 files changed, 314 insertions(+)
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c

diff --git a/Makefile b/Makefile
index aab38cb02b24..69119440b843 100644
--- a/Makefile
+++ b/Makefile
@@ -845,6 +845,12 @@ ifdef CONFIG_LIVEPATCH
KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
endif

+ifdef CONFIG_SHADOW_CALL_STACK
+CC_FLAGS_SCS := -fsanitize=shadow-call-stack
+KBUILD_CFLAGS += $(CC_FLAGS_SCS)
+export CC_FLAGS_SCS
+endif
+
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)

diff --git a/arch/Kconfig b/arch/Kconfig
index 98de654b79b3..66b34fd0df54 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -526,6 +526,40 @@ config STACKPROTECTOR_STRONG
about 20% of all kernel functions, which increases the kernel code
size by about 2%.

+config ARCH_SUPPORTS_SHADOW_CALL_STACK
+ bool
+ help
+ An architecture should select this if it supports Clang's Shadow
+ Call Stack, has asm/scs.h, and implements runtime support for shadow
+ stack switching.
+
+config SHADOW_CALL_STACK
+ bool "Clang Shadow Call Stack"
+ depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
+ help
+ This option enables Clang's Shadow Call Stack, which uses a
+ shadow stack to protect function return addresses from being
+ overwritten by an attacker. More information can be found from
+ Clang's documentation:
+
+ https://clang.llvm.org/docs/ShadowCallStack.html
+
+ Note that security guarantees in the kernel differ from the ones
+ documented for user space. The kernel must store addresses of shadow
+ stacks used by other tasks and interrupt handlers in memory, which
+ means an attacker capable reading and writing arbitrary memory may
+ be able to locate them and hijack control flow by modifying shadow
+ stacks that are not currently in use.
+
+config SHADOW_CALL_STACK_VMAP
+ bool "Use virtually mapped shadow call stacks"
+ depends on SHADOW_CALL_STACK
+ help
+ Use virtually mapped shadow call stacks. Selecting this option
+ provides better stack exhaustion protection, but increases per-thread
+ memory consumption as a full page is allocated for each shadow stack.
+
+
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 333a6695a918..18fc4d29ef27 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -42,3 +42,9 @@
* compilers, like ICC.
*/
#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#if __has_feature(shadow_call_stack)
+# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
+#else
+# define __noscs
+#endif
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 72393a8c1a6c..be5d5be4b1ae 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -202,6 +202,10 @@ struct ftrace_likely_data {
# define randomized_struct_fields_end
#endif

+#ifndef __noscs
+# define __noscs
+#endif
+
#ifndef asm_volatile_goto
#define asm_volatile_goto(x...) asm goto(x)
#endif
diff --git a/include/linux/scs.h b/include/linux/scs.h
new file mode 100644
index 000000000000..c5572fd770b0
--- /dev/null
+++ b/include/linux/scs.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#ifndef _LINUX_SCS_H
+#define _LINUX_SCS_H
+
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+/*
+ * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
+ * architecture) provided ~40% safety margin on stack usage while keeping
+ * memory allocation overhead reasonable.
+ */
+#define SCS_SIZE 1024UL
+#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
+
+/*
+ * A random number outside the kernel's virtual address space to mark the
+ * end of the shadow stack.
+ */
+#define SCS_END_MAGIC 0xaf0194819b1635f6UL
+
+#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack)
+
+static inline void task_set_scs(struct task_struct *tsk, void *s)
+{
+ task_scs(tsk) = s;
+}
+
+extern void scs_init(void);
+extern void scs_task_reset(struct task_struct *tsk);
+extern int scs_prepare(struct task_struct *tsk, int node);
+extern bool scs_corrupted(struct task_struct *tsk);
+extern void scs_release(struct task_struct *tsk);
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+#define task_scs(tsk) NULL
+
+static inline void task_set_scs(struct task_struct *tsk, void *s) {}
+static inline void scs_init(void) {}
+static inline void scs_task_reset(struct task_struct *tsk) {}
+static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
+static inline bool scs_corrupted(struct task_struct *tsk) { return false; }
+static inline void scs_release(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* _LINUX_SCS_H */
diff --git a/init/init_task.c b/init/init_task.c
index 9e5cbe5eab7b..cbd40460e903 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/audit.h>
#include <linux/numa.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <linux/uaccess.h>
@@ -184,6 +185,13 @@ struct task_struct init_task
};
EXPORT_SYMBOL(init_task);

+#ifdef CONFIG_SHADOW_CALL_STACK
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
+ __aligned(SCS_SIZE) = {
+ [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
+};
+#endif
+
/*
* Initial thread structure. Alignment of this is handled by a special
* linker map entry.
diff --git a/kernel/Makefile b/kernel/Makefile
index 4cb4130ced32..c332eb9d4841 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -103,6 +103,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-$(CONFIG_PERF_EVENTS) += events/

diff --git a/kernel/fork.c b/kernel/fork.c
index 60a1295f4384..2bc73d654593 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,6 +94,7 @@
#include <linux/thread_info.h>
#include <linux/stackleak.h>
#include <linux/kasan.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -454,6 +455,8 @@ void put_task_stack(struct task_struct *tsk)

void free_task(struct task_struct *tsk)
{
+ scs_release(tsk);
+
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
@@ -837,6 +840,8 @@ void __init fork_init(void)
NULL, free_vm_stack_cache);
#endif

+ scs_init();
+
lockdep_init_task(&init_task);
uprobes_init();
}
@@ -896,6 +901,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_stack;

+ err = scs_prepare(tsk, node);
+ if (err)
+ goto free_stack;
+
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1a9983da4408..7473cd685560 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11,6 +11,7 @@
#include <linux/nospec.h>

#include <linux/kcov.h>
+#include <linux/scs.h>

#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -6036,6 +6037,7 @@ void init_idle(struct task_struct *idle, int cpu)
idle->se.exec_start = sched_clock();
idle->flags |= PF_IDLE;

+ scs_task_reset(idle);
kasan_unpoison_task_stack(idle);

#ifdef CONFIG_SMP
diff --git a/kernel/scs.c b/kernel/scs.c
new file mode 100644
index 000000000000..28abed21950c
--- /dev/null
+++ b/kernel/scs.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/kasan.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/scs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/scs.h>
+
+static inline void *__scs_base(struct task_struct *tsk)
+{
+ /*
+ * To minimize risk the of exposure, architectures may clear a
+ * task's thread_info::shadow_call_stack while that task is
+ * running, and only save/restore the active shadow call stack
+ * pointer when the usual register may be clobbered (e.g. across
+ * context switches).
+ *
+ * The shadow call stack is aligned to SCS_SIZE, and grows
+ * upwards, so we can mask out the low bits to extract the base
+ * when the task is not running.
+ */
+ return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
+}
+
+static inline unsigned long *scs_magic(void *s)
+{
+ return (unsigned long *)(s + SCS_SIZE) - 1;
+}
+
+static inline void scs_set_magic(void *s)
+{
+ *scs_magic(s) = SCS_END_MAGIC;
+}
+
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+
+/* Matches NR_CACHED_STACKS for VMAP_STACK */
+#define NR_CACHED_SCS 2
+static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]);
+
+static void *scs_alloc(int node)
+{
+ int i;
+ void *s;
+
+ for (i = 0; i < NR_CACHED_SCS; i++) {
+ s = this_cpu_xchg(scs_cache[i], NULL);
+ if (s) {
+ memset(s, 0, SCS_SIZE);
+ goto out;
+ }
+ }
+
+ /*
+ * We allocate a full page for the shadow stack, which should be
+ * more than we need. Check the assumption nevertheless.
+ */
+ BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);
+
+ s = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL, 0,
+ node, __builtin_return_address(0));
+
+out:
+ if (s)
+ scs_set_magic(s);
+ /* TODO: poison for KASAN, unpoison in scs_free */
+
+ return s;
+}
+
+static void scs_free(void *s)
+{
+ int i;
+
+ for (i = 0; i < NR_CACHED_SCS; i++)
+ if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL)
+ return;
+
+ vfree_atomic(s);
+}
+
+static int scs_cleanup(unsigned int cpu)
+{
+ int i;
+ void **cache = per_cpu_ptr(scs_cache, cpu);
+
+ for (i = 0; i < NR_CACHED_SCS; i++) {
+ vfree(cache[i]);
+ cache[i] = NULL;
+ }
+
+ return 0;
+}
+
+void __init scs_init(void)
+{
+ WARN_ON(cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
+ scs_cleanup) < 0);
+}
+
+#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */
+
+static struct kmem_cache *scs_cache;
+
+static inline void *scs_alloc(int node)
+{
+ void *s;
+
+ s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
+ if (s) {
+ scs_set_magic(s);
+ /*
+ * Poison the allocation to catch unintentional accesses to
+ * the shadow stack when KASAN is enabled.
+ */
+ kasan_poison_object_data(scs_cache, s);
+ }
+
+ return s;
+}
+
+static inline void scs_free(void *s)
+{
+ kasan_unpoison_object_data(scs_cache, s);
+ kmem_cache_free(scs_cache, s);
+}
+
+void __init scs_init(void)
+{
+ scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
+ 0, NULL);
+ WARN_ON(!scs_cache);
+}
+
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+void scs_task_reset(struct task_struct *tsk)
+{
+ /*
+ * Reset the shadow stack to the base address in case the task
+ * is reused.
+ */
+ task_set_scs(tsk, __scs_base(tsk));
+}
+
+int scs_prepare(struct task_struct *tsk, int node)
+{
+ void *s;
+
+ s = scs_alloc(node);
+ if (!s)
+ return -ENOMEM;
+
+ task_set_scs(tsk, s);
+ return 0;
+}
+
+bool scs_corrupted(struct task_struct *tsk)
+{
+ unsigned long *magic = scs_magic(__scs_base(tsk));
+
+ return READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
+}
+
+void scs_release(struct task_struct *tsk)
+{
+ void *s;
+
+ s = __scs_base(tsk);
+ if (!s)
+ return;
+
+ WARN_ON(scs_corrupted(tsk));
+
+ task_set_scs(tsk, NULL);
+ scs_free(s);
+}
--
2.25.0.265.gbab2e86ba0-goog

2020-02-19 00:09:31

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v8 04/12] scs: disable when function graph tracing is enabled

The graph tracer hooks returns by modifying frame records on the
(regular) stack, but with SCS the return address is taken from the
shadow stack, and the value in the frame record has no effect. As we
don't currently have a mechanism to determine the corresponding slot
on the shadow stack (and to pass this through the ftrace
infrastructure), for now let's disable SCS when the graph tracer is
enabled.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/Kconfig | 1 +
1 file changed, 1 insertion(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index 66b34fd0df54..4102b8e0eea9 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -535,6 +535,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK

config SHADOW_CALL_STACK
bool "Clang Shadow Call Stack"
+ depends on !FUNCTION_GRAPH_TRACER
depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
help
This option enables Clang's Shadow Call Stack, which uses a
--
2.25.0.265.gbab2e86ba0-goog

2020-02-19 00:09:31

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v8 03/12] scs: add support for stack usage debugging

Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks. When enabled,
also prints out the highest shadow stack usage per process.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
kernel/scs.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)

diff --git a/kernel/scs.c b/kernel/scs.c
index 5245e992c692..ad74d13f2c0f 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -184,6 +184,44 @@ int scs_prepare(struct task_struct *tsk, int node)
return 0;
}

+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long scs_used(struct task_struct *tsk)
+{
+ unsigned long *p = __scs_base(tsk);
+ unsigned long *end = scs_magic(p);
+ unsigned long s = (unsigned long)p;
+
+ while (p < end && READ_ONCE_NOCHECK(*p))
+ p++;
+
+ return (unsigned long)p - s;
+}
+
+static void scs_check_usage(struct task_struct *tsk)
+{
+ static DEFINE_SPINLOCK(lock);
+ static unsigned long highest;
+ unsigned long used = scs_used(tsk);
+
+ if (used <= highest)
+ return;
+
+ spin_lock(&lock);
+
+ if (used > highest) {
+ pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
+ tsk->comm, task_pid_nr(tsk), used);
+ highest = used;
+ }
+
+ spin_unlock(&lock);
+}
+#else
+static inline void scs_check_usage(struct task_struct *tsk)
+{
+}
+#endif
+
bool scs_corrupted(struct task_struct *tsk)
{
unsigned long *magic = scs_magic(__scs_base(tsk));
@@ -200,6 +238,7 @@ void scs_release(struct task_struct *tsk)
return;

WARN_ON(scs_corrupted(tsk));
+ scs_check_usage(tsk);

scs_account(tsk, -1);
task_set_scs(tsk, NULL);
--
2.25.0.265.gbab2e86ba0-goog

2020-02-19 00:10:06

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v8 09/12] arm64: disable SCS for hypervisor code

Disable SCS for code that runs at a different exception level by
adding __noscs to __hyp_text.

Suggested-by: James Morse <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
---
arch/arm64/include/asm/kvm_hyp.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index a3a6a2ba9a63..0f0603f55ea0 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -13,7 +13,7 @@
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>

-#define __hyp_text __section(.hyp.text) notrace
+#define __hyp_text __section(.hyp.text) notrace __noscs

#define read_sysreg_elx(r,nvh,vh) \
({ \
--
2.25.0.265.gbab2e86ba0-goog

2020-02-19 00:10:15

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v8 10/12] arm64: implement Shadow Call Stack

This change implements shadow stack switching, initial SCS set-up,
and interrupt shadow stacks for arm64.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/Kconfig | 5 ++++
arch/arm64/include/asm/scs.h | 37 +++++++++++++++++++++++++
arch/arm64/include/asm/thread_info.h | 3 +++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +++
arch/arm64/kernel/entry.S | 32 ++++++++++++++++++++--
arch/arm64/kernel/head.S | 9 +++++++
arch/arm64/kernel/irq.c | 2 ++
arch/arm64/kernel/process.c | 2 ++
arch/arm64/kernel/scs.c | 40 ++++++++++++++++++++++++++++
arch/arm64/kernel/smp.c | 4 +++
11 files changed, 136 insertions(+), 2 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0b30e884e088..eae76686be77 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -65,6 +65,7 @@ config ARM64
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_SUPPORTS_MEMORY_FAILURE
+ select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
select ARCH_SUPPORTS_NUMA_BALANCING
@@ -1022,6 +1023,10 @@ config ARCH_HAS_CACHE_LINE_SIZE
config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y if PGTABLE_LEVELS > 2

+# Supported by clang >= 7.0
+config CC_HAVE_SHADOW_CALL_STACK
+ def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
+
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
---help---
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
new file mode 100644
index 000000000000..c50d2b0c6c5f
--- /dev/null
+++ b/arch/arm64/include/asm/scs.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/scs.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+extern void scs_init_irq(void);
+
+static __always_inline void scs_save(struct task_struct *tsk)
+{
+ void *s;
+
+ asm volatile("mov %0, x18" : "=r" (s));
+ task_set_scs(tsk, s);
+}
+
+static inline void scs_overflow_check(struct task_struct *tsk)
+{
+ if (unlikely(scs_corrupted(tsk)))
+ panic("corrupted shadow stack detected inside scheduler\n");
+}
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+static inline void scs_init_irq(void) {}
+static inline void scs_save(struct task_struct *tsk) {}
+static inline void scs_overflow_check(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* __ASSEMBLY __ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index f0cec4160136..8c73764b9ed2 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -41,6 +41,9 @@ struct thread_info {
#endif
} preempt;
};
+#ifdef CONFIG_SHADOW_CALL_STACK
+ void *shadow_call_stack;
+#endif
};

#define thread_saved_pc(tsk) \
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index fc6488660f64..08fafc4da2cf 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-y += vdso/ probes/
obj-$(CONFIG_COMPAT_VDSO) += vdso32/
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index a5bdce8af65b..d485dc5cd196 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -33,6 +33,9 @@ int main(void)
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
+#endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+ DEFINE(TSK_TI_SCS, offsetof(struct task_struct, thread_info.shadow_call_stack));
#endif
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
#ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 9461d812ae27..4b18c3bbdea5 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -177,6 +177,10 @@ alternative_cb_end

apply_ssbd 1, x22, x23

+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [tsk, #TSK_TI_SCS] // Restore shadow call stack
+ str xzr, [tsk, #TSK_TI_SCS] // Limit visibility of saved SCS
+#endif
.else
add x21, sp, #S_FRAME_SIZE
get_current_task tsk
@@ -278,6 +282,12 @@ alternative_else_nop_endif
ct_user_enter
.endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ .if \el == 0
+ str x18, [tsk, #TSK_TI_SCS] // Save shadow call stack
+ .endif
+#endif
+
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
@@ -383,6 +393,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

.macro irq_stack_entry
mov x19, sp // preserve the original sp
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x24, x18 // preserve the original shadow stack
+#endif

/*
* Compare sp with the base of the task stack.
@@ -400,15 +413,25 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

/* switch to the irq stack */
mov sp, x26
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* also switch to the irq shadow stack */
+ ldr_this_cpu x18, irq_shadow_call_stack_ptr, x26
+#endif
+
9998:
.endm

/*
- * x19 should be preserved between irq_stack_entry and
- * irq_stack_exit.
+ * The callee-saved regs (x19-x29) should be preserved between
+ * irq_stack_entry and irq_stack_exit, but note that kernel_entry
+ * uses x20-x23 to store data for later use.
*/
.macro irq_stack_exit
mov sp, x19
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x18, x24
+#endif
.endm

/* GPRs used by entry code */
@@ -895,6 +918,11 @@ ENTRY(cpu_switch_to)
ldr lr, [x8]
mov sp, x9
msr sp_el0, x1
+#ifdef CONFIG_SHADOW_CALL_STACK
+ str x18, [x0, #TSK_TI_SCS]
+ ldr x18, [x1, #TSK_TI_SCS]
+ str xzr, [x1, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
ret
ENDPROC(cpu_switch_to)
NOKPROBE(cpu_switch_to)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 989b1944cb71..ca561de903d4 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -27,6 +27,7 @@
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/scs.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
@@ -424,6 +425,10 @@ __primary_switched:
stp xzr, x30, [sp, #-16]!
mov x29, sp

+#ifdef CONFIG_SHADOW_CALL_STACK
+ adr_l x18, init_shadow_call_stack // Set shadow call stack
+#endif
+
str_l x21, __fdt_pointer, x5 // Save FDT pointer

ldr_l x4, kimage_vaddr // Save the offset between
@@ -731,6 +736,10 @@ __secondary_switched:
ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow
msr sp_el0, x2
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [x2, #TSK_TI_SCS] // set shadow call stack
+ str xzr, [x2, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
mov x29, #0
mov x30, #0
b secondary_start_kernel
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 04a327ccf84d..fe0ca522ff60 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -21,6 +21,7 @@
#include <linux/vmalloc.h>
#include <asm/daifflags.h>
#include <asm/vmap_stack.h>
+#include <asm/scs.h>

unsigned long irq_err_count;

@@ -63,6 +64,7 @@ static void init_irq_stacks(void)
void __init init_IRQ(void)
{
init_irq_stacks();
+ scs_init_irq();
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 00626057a384..9151616c354c 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -52,6 +52,7 @@
#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/pointer_auth.h>
+#include <asm/scs.h>
#include <asm/stacktrace.h>

#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
@@ -514,6 +515,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
uao_thread_switch(next);
ptrauth_thread_switch(next);
ssbs_thread_switch(next);
+ scs_overflow_check(next);

/*
* Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
new file mode 100644
index 000000000000..eaadf5430baa
--- /dev/null
+++ b/arch/arm64/kernel/scs.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/percpu.h>
+#include <linux/vmalloc.h>
+#include <asm/pgtable.h>
+#include <asm/scs.h>
+
+DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+
+#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
+DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
+ __aligned(SCS_SIZE);
+#endif
+
+void scs_init_irq(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+ unsigned long *p;
+
+ p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
+#else
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ per_cpu(irq_shadow_call_stack, cpu);
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+ }
+}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index d4ed9a19d8fe..f2cb344f998c 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -46,6 +46,7 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
+#include <asm/scs.h>
#include <asm/smp_plat.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -358,6 +359,9 @@ void cpu_die(void)
{
unsigned int cpu = smp_processor_id();

+ /* Save the shadow stack pointer before exiting the idle task */
+ scs_save(current);
+
idle_task_exit();

local_daif_mask();
--
2.25.0.265.gbab2e86ba0-goog

2020-02-19 00:10:24

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v8 02/12] scs: add accounting

This change adds accounting for the memory allocated for shadow stacks.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
drivers/base/node.c | 6 ++++++
fs/proc/meminfo.c | 4 ++++
include/linux/mmzone.h | 3 +++
kernel/scs.c | 20 ++++++++++++++++++++
mm/page_alloc.c | 6 ++++++
mm/vmstat.c | 3 +++
6 files changed, 42 insertions(+)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 98a31bafc8a2..874a8b428438 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d AnonPages: %8lu kB\n"
"Node %d Shmem: %8lu kB\n"
"Node %d KernelStack: %8lu kB\n"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ "Node %d ShadowCallStack:%8lu kB\n"
+#endif
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
@@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8c1f1bb1a5ce..49768005a79e 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB));
+#ifdef CONFIG_SHADOW_CALL_STACK
+ seq_printf(m, "ShadowCallStack:%8lu kB\n",
+ global_zone_page_state(NR_KERNEL_SCS_BYTES) / 1024);
+#endif
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 462f6873905a..0a6f395abc68 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -200,6 +200,9 @@ enum zone_stat_item {
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ NR_KERNEL_SCS_BYTES, /* measured in bytes */
+#endif
/* Second 128 byte cacheline */
NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
diff --git a/kernel/scs.c b/kernel/scs.c
index 28abed21950c..5245e992c692 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -12,6 +12,7 @@
#include <linux/scs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/vmstat.h>
#include <asm/scs.h>

static inline void *__scs_base(struct task_struct *tsk)
@@ -89,6 +90,11 @@ static void scs_free(void *s)
vfree_atomic(s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return vmalloc_to_page(__scs_base(tsk));
+}
+
static int scs_cleanup(unsigned int cpu)
{
int i;
@@ -135,6 +141,11 @@ static inline void scs_free(void *s)
kmem_cache_free(scs_cache, s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return virt_to_page(__scs_base(tsk));
+}
+
void __init scs_init(void)
{
scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
@@ -153,6 +164,12 @@ void scs_task_reset(struct task_struct *tsk)
task_set_scs(tsk, __scs_base(tsk));
}

+static void scs_account(struct task_struct *tsk, int account)
+{
+ mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_BYTES,
+ account * SCS_SIZE);
+}
+
int scs_prepare(struct task_struct *tsk, int node)
{
void *s;
@@ -162,6 +179,8 @@ int scs_prepare(struct task_struct *tsk, int node)
return -ENOMEM;

task_set_scs(tsk, s);
+ scs_account(tsk, 1);
+
return 0;
}

@@ -182,6 +201,7 @@ void scs_release(struct task_struct *tsk)

WARN_ON(scs_corrupted(tsk));

+ scs_account(tsk, -1);
task_set_scs(tsk, NULL);
scs_free(s);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3c4eb750a199..1381b9d84e4c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5340,6 +5340,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" managed:%lukB"
" mlocked:%lukB"
" kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ " shadow_call_stack:%lukB"
+#endif
" pagetables:%lukB"
" bounce:%lukB"
" free_pcp:%lukB"
@@ -5362,6 +5365,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)),
zone_page_state(zone, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ zone_page_state(zone, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
K(zone_page_state(zone, NR_PAGETABLE)),
K(zone_page_state(zone, NR_BOUNCE)),
K(free_pcp),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 78d53378db99..d0650391c8c1 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1119,6 +1119,9 @@ const char * const vmstat_text[] = {
"nr_mlock",
"nr_page_table_pages",
"nr_kernel_stack",
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ "nr_shadow_call_stack_bytes",
+#endif
"nr_bounce",
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
--
2.25.0.265.gbab2e86ba0-goog

2020-02-19 00:10:48

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v8 08/12] arm64: vdso: disable Shadow Call Stack

Shadow stacks are only available in the kernel, so disable SCS
instrumentation for the vDSO.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/kernel/vdso/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index dd2514bb1511..a87a4f11724e 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -25,7 +25,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING

VDSO_LDFLAGS := -Bsymbolic

-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS)
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.25.0.265.gbab2e86ba0-goog

2020-02-19 00:10:48

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v8 06/12] arm64: preserve x18 when CPU is suspended

Don't lose the current task's shadow stack when the CPU is suspended.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/mm/proc.S | 14 ++++++++++++++
2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 8939c87c4dce..0cde2f473971 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H

-#define NR_CTX_REGS 12
+#define NR_CTX_REGS 13
#define NR_CALLEE_SAVED_REGS 12

/*
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index aafed6902411..7d37e3c70ff5 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -56,6 +56,8 @@
* cpu_do_suspend - save CPU registers context
*
* x0: virtual address of context pointer
+ *
+ * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
*/
SYM_FUNC_START(cpu_do_suspend)
mrs x2, tpidr_el0
@@ -80,6 +82,11 @@ alternative_endif
stp x8, x9, [x0, #48]
stp x10, x11, [x0, #64]
stp x12, x13, [x0, #80]
+ /*
+ * Save x18 as it may be used as a platform register, e.g. by shadow
+ * call stack.
+ */
+ str x18, [x0, #96]
ret
SYM_FUNC_END(cpu_do_suspend)

@@ -96,6 +103,13 @@ SYM_FUNC_START(cpu_do_resume)
ldp x9, x10, [x0, #48]
ldp x11, x12, [x0, #64]
ldp x13, x14, [x0, #80]
+ /*
+ * Restore x18, as it may be used as a platform register, and clear
+ * the buffer to minimize the risk of exposure when used for shadow
+ * call stack.
+ */
+ ldr x18, [x0, #96]
+ str xzr, [x0, #96]
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
--
2.25.0.265.gbab2e86ba0-goog

2020-02-19 00:11:24

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v8 07/12] arm64: efi: restore x18 if it was corrupted

If we detect a corrupted x18, restore the register before jumping back
to potentially SCS instrumented code. This is safe, because the wrapper
is called with preemption disabled and a separate shadow stack is used
for interrupt handling.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/kernel/efi-rt-wrapper.S | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 3fc71106cb2b..6ca6c0dc11a1 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -34,5 +34,14 @@ ENTRY(__efi_rt_asm_wrapper)
ldp x29, x30, [sp], #32
b.ne 0f
ret
-0: b efi_handle_corrupted_x18 // tail call
+0:
+ /*
+ * With CONFIG_SHADOW_CALL_STACK, the kernel uses x18 to store a
+ * shadow stack pointer, which we need to restore before returning to
+ * potentially instrumented code. This is safe because the wrapper is
+ * called with preemption disabled and a separate shadow stack is used
+ * for interrupts.
+ */
+ mov x18, x2
+ b efi_handle_corrupted_x18 // tail call
ENDPROC(__efi_rt_asm_wrapper)
--
2.25.0.265.gbab2e86ba0-goog

2020-02-19 00:11:24

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v8 11/12] arm64: scs: add shadow stacks for SDEI

This change adds per-CPU shadow call stacks for the SDEI handler.
Similarly to how the kernel stacks are handled, we add separate shadow
stacks for normal and critical events.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: James Morse <[email protected]>
Tested-by: James Morse <[email protected]>
---
arch/arm64/include/asm/scs.h | 2 +
arch/arm64/kernel/entry.S | 14 ++++-
arch/arm64/kernel/scs.c | 106 +++++++++++++++++++++++++++++------
arch/arm64/kernel/sdei.c | 7 +++
4 files changed, 112 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
index c50d2b0c6c5f..8e327e14bc15 100644
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -9,6 +9,7 @@
#ifdef CONFIG_SHADOW_CALL_STACK

extern void scs_init_irq(void);
+extern int scs_init_sdei(void);

static __always_inline void scs_save(struct task_struct *tsk)
{
@@ -27,6 +28,7 @@ static inline void scs_overflow_check(struct task_struct *tsk)
#else /* CONFIG_SHADOW_CALL_STACK */

static inline void scs_init_irq(void) {}
+static inline int scs_init_sdei(void) { return 0; }
static inline void scs_save(struct task_struct *tsk) {}
static inline void scs_overflow_check(struct task_struct *tsk) {}

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 4b18c3bbdea5..2e2ce1b9ebf5 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1050,13 +1050,16 @@ ENTRY(__sdei_asm_handler)

mov x19, x1

+#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
+ ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
+#endif
+
#ifdef CONFIG_VMAP_STACK
/*
* entry.S may have been using sp as a scratch register, find whether
* this is a normal or critical event and switch to the appropriate
* stack for this CPU.
*/
- ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
cbnz w4, 1f
ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
b 2f
@@ -1066,6 +1069,15 @@ ENTRY(__sdei_asm_handler)
mov sp, x5
#endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* Use a separate shadow call stack for normal and critical events */
+ cbnz w4, 3f
+ ldr_this_cpu dst=x18, sym=sdei_shadow_call_stack_normal_ptr, tmp=x6
+ b 4f
+3: ldr_this_cpu dst=x18, sym=sdei_shadow_call_stack_critical_ptr, tmp=x6
+4:
+#endif
+
/*
* We may have interrupted userspace, or a guest, or exit-from or
* return-to either of these. We can't trust sp_el0, restore it.
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
index eaadf5430baa..dddb7c56518b 100644
--- a/arch/arm64/kernel/scs.c
+++ b/arch/arm64/kernel/scs.c
@@ -10,31 +10,105 @@
#include <asm/pgtable.h>
#include <asm/scs.h>

-DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+#define DECLARE_SCS(name) \
+ DECLARE_PER_CPU(unsigned long *, name ## _ptr); \
+ DECLARE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name)

-#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
-DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
- __aligned(SCS_SIZE);
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+#define DEFINE_SCS(name) \
+ DEFINE_PER_CPU(unsigned long *, name ## _ptr)
+#else
+/* Allocate a static per-CPU shadow stack */
+#define DEFINE_SCS(name) \
+ DEFINE_PER_CPU(unsigned long *, name ## _ptr); \
+ DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \
+ __aligned(SCS_SIZE)
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+DECLARE_SCS(irq_shadow_call_stack);
+DECLARE_SCS(sdei_shadow_call_stack_normal);
+DECLARE_SCS(sdei_shadow_call_stack_critical);
+
+DEFINE_SCS(irq_shadow_call_stack);
+#ifdef CONFIG_ARM_SDE_INTERFACE
+DEFINE_SCS(sdei_shadow_call_stack_normal);
+DEFINE_SCS(sdei_shadow_call_stack_critical);
#endif

+static int scs_alloc_percpu(unsigned long * __percpu *ptr, int cpu)
+{
+ unsigned long *p;
+
+ p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ if (!p)
+ return -ENOMEM;
+ per_cpu(*ptr, cpu) = p;
+
+ return 0;
+}
+
+static void scs_free_percpu(unsigned long * __percpu *ptr, int cpu)
+{
+ unsigned long *p = per_cpu(*ptr, cpu);
+
+ if (p) {
+ per_cpu(*ptr, cpu) = NULL;
+ vfree(p);
+ }
+}
+
+static void scs_free_sdei(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ scs_free_percpu(&sdei_shadow_call_stack_normal_ptr, cpu);
+ scs_free_percpu(&sdei_shadow_call_stack_critical_ptr, cpu);
+ }
+}
+
void scs_init_irq(void)
{
int cpu;

for_each_possible_cpu(cpu) {
-#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
- unsigned long *p;
+ if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK_VMAP))
+ WARN_ON(scs_alloc_percpu(&irq_shadow_call_stack_ptr,
+ cpu));
+ else
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ per_cpu(irq_shadow_call_stack, cpu);
+ }
+}

- p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
- VMALLOC_START, VMALLOC_END,
- GFP_SCS, PAGE_KERNEL,
- 0, cpu_to_node(cpu),
- __builtin_return_address(0));
+int scs_init_sdei(void)
+{
+ int cpu;

- per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
-#else
- per_cpu(irq_shadow_call_stack_ptr, cpu) =
- per_cpu(irq_shadow_call_stack, cpu);
-#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+ if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
+ return 0;
+
+ for_each_possible_cpu(cpu) {
+ if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK_VMAP)) {
+ if (scs_alloc_percpu(
+ &sdei_shadow_call_stack_normal_ptr, cpu) ||
+ scs_alloc_percpu(
+ &sdei_shadow_call_stack_critical_ptr, cpu)) {
+ scs_free_sdei();
+ return -ENOMEM;
+ }
+ } else {
+ per_cpu(sdei_shadow_call_stack_normal_ptr, cpu) =
+ per_cpu(sdei_shadow_call_stack_normal, cpu);
+ per_cpu(sdei_shadow_call_stack_critical_ptr, cpu) =
+ per_cpu(sdei_shadow_call_stack_critical, cpu);
+ }
}
+
+ return 0;
}
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index d6259dac62b6..2854b9f7760a 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -13,6 +13,7 @@
#include <asm/kprobes.h>
#include <asm/mmu.h>
#include <asm/ptrace.h>
+#include <asm/scs.h>
#include <asm/sections.h>
#include <asm/stacktrace.h>
#include <asm/sysreg.h>
@@ -162,6 +163,12 @@ unsigned long sdei_arch_get_entry_point(int conduit)
return 0;
}

+ if (scs_init_sdei()) {
+ if (IS_ENABLED(CONFIG_VMAP_STACK))
+ free_sdei_stacks();
+ return 0;
+ }
+
sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;

#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
--
2.25.0.265.gbab2e86ba0-goog

2020-02-19 00:12:13

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v8 12/12] efi/libstub: disable SCS

Disable SCS for the EFI stub and allow x18 to be used.

Suggested-by: James Morse <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
---
drivers/firmware/efi/libstub/Makefile | 3 +++
1 file changed, 3 insertions(+)

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 98a81576213d..dff9fa5a3f1c 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -30,6 +30,9 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
$(call cc-option,-fno-stack-protector) \
-D__DISABLE_EXPORTS

+# remove SCS flags from all objects in this directory
+KBUILD_CFLAGS := $(filter-out -ffixed-x18 $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+
GCOV_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.25.0.265.gbab2e86ba0-goog

2020-02-19 00:12:13

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v8 05/12] arm64: reserve x18 from general allocation with SCS

Reserve the x18 register from general allocation when SCS is enabled,
because the compiler uses the register to store the current task's
shadow stack pointer. Note that all external kernel modules must also be
compiled with -ffixed-x18 if the kernel has SCS enabled.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/Makefile | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index dca1a97751ab..ab26b448faa9 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -65,6 +65,10 @@ stack_protector_prepare: prepare0
include/generated/asm-offsets.h))
endif

+ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
+KBUILD_CFLAGS += -ffixed-x18
+endif
+
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
CHECKFLAGS += -D__AARCH64EB__
--
2.25.0.265.gbab2e86ba0-goog

2020-02-19 00:59:41

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v8 09/12] arm64: disable SCS for hypervisor code

On Tue, Feb 18, 2020 at 04:08:14PM -0800, Sami Tolvanen wrote:
> Disable SCS for code that runs at a different exception level by
> adding __noscs to __hyp_text.
>
> Suggested-by: James Morse <[email protected]>
> Signed-off-by: Sami Tolvanen <[email protected]>

Reviewed-by: Kees Cook <[email protected]>

-Kees

> ---
> arch/arm64/include/asm/kvm_hyp.h | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
> index a3a6a2ba9a63..0f0603f55ea0 100644
> --- a/arch/arm64/include/asm/kvm_hyp.h
> +++ b/arch/arm64/include/asm/kvm_hyp.h
> @@ -13,7 +13,7 @@
> #include <asm/kvm_mmu.h>
> #include <asm/sysreg.h>
>
> -#define __hyp_text __section(.hyp.text) notrace
> +#define __hyp_text __section(.hyp.text) notrace __noscs
>
> #define read_sysreg_elx(r,nvh,vh) \
> ({ \
> --
> 2.25.0.265.gbab2e86ba0-goog
>

--
Kees Cook

2020-02-19 01:01:28

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v8 12/12] efi/libstub: disable SCS

On Tue, Feb 18, 2020 at 04:08:17PM -0800, Sami Tolvanen wrote:
> Disable SCS for the EFI stub and allow x18 to be used.
>
> Suggested-by: James Morse <[email protected]>
> Signed-off-by: Sami Tolvanen <[email protected]>

Reviewed-by: Kees Cook <[email protected]>

-Kees

> ---
> drivers/firmware/efi/libstub/Makefile | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
> index 98a81576213d..dff9fa5a3f1c 100644
> --- a/drivers/firmware/efi/libstub/Makefile
> +++ b/drivers/firmware/efi/libstub/Makefile
> @@ -30,6 +30,9 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
> $(call cc-option,-fno-stack-protector) \
> -D__DISABLE_EXPORTS
>
> +# remove SCS flags from all objects in this directory
> +KBUILD_CFLAGS := $(filter-out -ffixed-x18 $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
> +
> GCOV_PROFILE := n
> KASAN_SANITIZE := n
> UBSAN_SANITIZE := n
> --
> 2.25.0.265.gbab2e86ba0-goog
>

--
Kees Cook

2020-02-19 04:22:29

by Randy Dunlap

[permalink] [raw]
Subject: Re: [PATCH v8 01/12] add support for Clang's Shadow Call Stack (SCS)

Hi Sami,

a couple of minor tweaks:

On 2/18/20 4:08 PM, Sami Tolvanen wrote:
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 98de654b79b3..66b34fd0df54 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -526,6 +526,40 @@ config STACKPROTECTOR_STRONG
> about 20% of all kernel functions, which increases the kernel code
> size by about 2%.
>
> +config ARCH_SUPPORTS_SHADOW_CALL_STACK
> + bool
> + help
> + An architecture should select this if it supports Clang's Shadow
> + Call Stack, has asm/scs.h, and implements runtime support for shadow
> + stack switching.
> +
> +config SHADOW_CALL_STACK
> + bool "Clang Shadow Call Stack"
> + depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
> + help
> + This option enables Clang's Shadow Call Stack, which uses a
> + shadow stack to protect function return addresses from being
> + overwritten by an attacker. More information can be found from

found in

> + Clang's documentation:
> +
> + https://clang.llvm.org/docs/ShadowCallStack.html
> +
> + Note that security guarantees in the kernel differ from the ones
> + documented for user space. The kernel must store addresses of shadow
> + stacks used by other tasks and interrupt handlers in memory, which
> + means an attacker capable reading and writing arbitrary memory may

capable of

> + be able to locate them and hijack control flow by modifying shadow
> + stacks that are not currently in use.
> +
> +config SHADOW_CALL_STACK_VMAP
> + bool "Use virtually mapped shadow call stacks"
> + depends on SHADOW_CALL_STACK
> + help
> + Use virtually mapped shadow call stacks. Selecting this option
> + provides better stack exhaustion protection, but increases per-thread
> + memory consumption as a full page is allocated for each shadow stack.
> +
> +
> config HAVE_ARCH_WITHIN_STACK_FRAMES
> bool
> help


thanks.
--
~Randy

2020-02-19 07:42:30

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH v8 12/12] efi/libstub: disable SCS

On Wed, 19 Feb 2020 at 01:09, Sami Tolvanen <[email protected]> wrote:
>
> Disable SCS for the EFI stub and allow x18 to be used.
>
> Suggested-by: James Morse <[email protected]>
> Signed-off-by: Sami Tolvanen <[email protected]>
> ---
> drivers/firmware/efi/libstub/Makefile | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
> index 98a81576213d..dff9fa5a3f1c 100644
> --- a/drivers/firmware/efi/libstub/Makefile
> +++ b/drivers/firmware/efi/libstub/Makefile
> @@ -30,6 +30,9 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
> $(call cc-option,-fno-stack-protector) \
> -D__DISABLE_EXPORTS
>
> +# remove SCS flags from all objects in this directory
> +KBUILD_CFLAGS := $(filter-out -ffixed-x18 $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
> +

I don't see why you'd need to remove -ffixed-x18 again here. Not using
x18 anywhere in the kernel is a much more maintainable approach.

In fact, now that I think of it, the EFI AArch64 platform binding
forbids the use of x18, so it would be better to add the -ffixed-x18
unconditionally for arm64 (even though the reason it forbids it is to
ensure compatibility with an OS using it as a platform register, and
so nothing is actually broken atm).

> GCOV_PROFILE := n
> KASAN_SANITIZE := n
> UBSAN_SANITIZE := n
> --
> 2.25.0.265.gbab2e86ba0-goog
>

2020-02-19 07:50:45

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH v8 09/12] arm64: disable SCS for hypervisor code

On Tue, 18 Feb 2020 16:08:14 -0800
Sami Tolvanen <[email protected]> wrote:

> Disable SCS for code that runs at a different exception level by
> adding __noscs to __hyp_text.
>
> Suggested-by: James Morse <[email protected]>
> Signed-off-by: Sami Tolvanen <[email protected]>

Acked-by: Marc Zyngier <[email protected]>

M.
--
Jazz is not dead. It just smells funny...

2020-02-19 11:35:49

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v8 04/12] scs: disable when function graph tracing is enabled

On Tue, Feb 18, 2020 at 04:08:09PM -0800, Sami Tolvanen wrote:
> The graph tracer hooks returns by modifying frame records on the
> (regular) stack, but with SCS the return address is taken from the
> shadow stack, and the value in the frame record has no effect. As we
> don't currently have a mechanism to determine the corresponding slot
> on the shadow stack (and to pass this through the ftrace
> infrastructure), for now let's disable SCS when the graph tracer is
> enabled.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
> arch/Kconfig | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 66b34fd0df54..4102b8e0eea9 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -535,6 +535,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
>
> config SHADOW_CALL_STACK
> bool "Clang Shadow Call Stack"
> + depends on !FUNCTION_GRAPH_TRACER

Fangrui Song has implemented `-fpatchable-function-entry` in LLVM (for
10.x onwards), so we can support this when DYNAMIC_FTRACE_WITH_REGS is
selected.

This can be:

depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER

... and we can update the commit message to something like:

| With SCS the return address is taken from the shadow stack and the
| value in the frame record has no effect. The mcount based graph tracer
| hooks returns by modifying frame records on the (regular) stack, and
| thus is not compatible. The patchable-function-entry graph tracer
| used for DYNAMIC_FTRACE_WITH_REGS modifies the LR before it is saved
| to the shadow stack, and is compatible.
|
| Modifying the mcount based graph tracer to work with SCS would require
| a mechanism to determine the corresponding slot on the shadow stack
| (and to pass this through the ftrace infrastructure), and we expect
| that everyone will eventually move to the patchable-function-entry
| based graph tracer anyway, so for now let's disable SCS when the
| mcount-based graph tracer is enabled.
|
| SCS and patchable-function-entry are both supported from LLVM 10.x.

Assuming you're happy with that:

Reviewed-by: Mark Rutland <[email protected]>

Thanks,
Mark.

2020-02-19 17:26:35

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v8 01/12] add support for Clang's Shadow Call Stack (SCS)

On Tue, Feb 18, 2020 at 8:20 PM Randy Dunlap <[email protected]> wrote:
>
> Hi Sami,
>
> a couple of minor tweaks:
>
> On 2/18/20 4:08 PM, Sami Tolvanen wrote:
> > diff --git a/arch/Kconfig b/arch/Kconfig
> > index 98de654b79b3..66b34fd0df54 100644
> > --- a/arch/Kconfig
> > +++ b/arch/Kconfig
> > @@ -526,6 +526,40 @@ config STACKPROTECTOR_STRONG
> > about 20% of all kernel functions, which increases the kernel code
> > size by about 2%.
> >
> > +config ARCH_SUPPORTS_SHADOW_CALL_STACK
> > + bool
> > + help
> > + An architecture should select this if it supports Clang's Shadow
> > + Call Stack, has asm/scs.h, and implements runtime support for shadow
> > + stack switching.
> > +
> > +config SHADOW_CALL_STACK
> > + bool "Clang Shadow Call Stack"
> > + depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
> > + help
> > + This option enables Clang's Shadow Call Stack, which uses a
> > + shadow stack to protect function return addresses from being
> > + overwritten by an attacker. More information can be found from
>
> found in
>
> > + Clang's documentation:
> > +
> > + https://clang.llvm.org/docs/ShadowCallStack.html
> > +
> > + Note that security guarantees in the kernel differ from the ones
> > + documented for user space. The kernel must store addresses of shadow
> > + stacks used by other tasks and interrupt handlers in memory, which
> > + means an attacker capable reading and writing arbitrary memory may
>
> capable of

Thanks, Randy! I'll fix these in the next version.

Sami

2020-02-19 18:01:54

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v8 04/12] scs: disable when function graph tracing is enabled

On Wed, Feb 19, 2020 at 3:34 AM Mark Rutland <[email protected]> wrote:
> Fangrui Song has implemented `-fpatchable-function-entry` in LLVM (for
> 10.x onwards), so we can support this when DYNAMIC_FTRACE_WITH_REGS is
> selected.
>
> This can be:
>
> depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
>
> ... and we can update the commit message to something like:
>
> | With SCS the return address is taken from the shadow stack and the
> | value in the frame record has no effect. The mcount based graph tracer
> | hooks returns by modifying frame records on the (regular) stack, and
> | thus is not compatible. The patchable-function-entry graph tracer
> | used for DYNAMIC_FTRACE_WITH_REGS modifies the LR before it is saved
> | to the shadow stack, and is compatible.
> |
> | Modifying the mcount based graph tracer to work with SCS would require
> | a mechanism to determine the corresponding slot on the shadow stack
> | (and to pass this through the ftrace infrastructure), and we expect
> | that everyone will eventually move to the patchable-function-entry
> | based graph tracer anyway, so for now let's disable SCS when the
> | mcount-based graph tracer is enabled.
> |
> | SCS and patchable-function-entry are both supported from LLVM 10.x.
>
> Assuming you're happy with that:
>
> Reviewed-by: Mark Rutland <[email protected]>

Great, thanks for pointing that out! This looks good to me, I'll use this in v9.

Sami

2020-02-19 18:28:06

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v8 12/12] efi/libstub: disable SCS

On Tue, Feb 18, 2020 at 11:41 PM Ard Biesheuvel
<[email protected]> wrote:
>
> On Wed, 19 Feb 2020 at 01:09, Sami Tolvanen <[email protected]> wrote:
> >
> > +# remove SCS flags from all objects in this directory
> > +KBUILD_CFLAGS := $(filter-out -ffixed-x18 $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
> > +
>
> I don't see why you'd need to remove -ffixed-x18 again here. Not using
> x18 anywhere in the kernel is a much more maintainable approach.

Sure, I will drop -ffixed-x18 from here in v9. Thanks,

Sami

2020-02-19 18:39:51

by James Morse

[permalink] [raw]
Subject: Re: [PATCH v8 00/12] add support for Clang's Shadow Call Stack

Hi Sami,

(CC: +Marc)

On 19/02/2020 00:08, Sami Tolvanen wrote:
> This patch series adds support for Clang's Shadow Call Stack
> (SCS) mitigation, which uses a separately allocated shadow stack
> to protect against return address overwrites.

I took this for a spin on some real hardware. cpu-idle, kexec hibernate etc all work
great... but starting a KVM guest causes the CPU to get stuck in EL2.

With CONFIG_SHADOW_CALL_STACK disabled, this doesn't happen ... so its something about the
feature being enabled.


I'm using clang-9 from debian bullseye/sid. (I tried to build tip of tree ... that doesn't
go so well on arm64)

KVM takes an instruction abort from EL2 to EL2, because some of the code it runs is not
mapped at EL2:

| ffffa00011588308 <__kvm_tlb_flush_local_vmid>:
| ffffa00011588308: d10103ff sub sp, sp, #0x40
| ffffa0001158830c: f90013f3 str x19, [sp, #32]
| ffffa00011588310: a9037bfd stp x29, x30, [sp, #48]
| ffffa00011588314: 9100c3fd add x29, sp, #0x30
| ffffa00011588318: 97ae18bf bl ffffa0001010e614 <__kern_hyp_va>

INSTRUCTION ABORT!

| ffffa0001158831c: f9400000 ldr x0, [x0]
| ffffa00011588320: 97ae18bd bl ffffa0001010e614 <__kern_hyp_va>
| ffffa00011588324: aa0003f3 mov x19, x0
| ffffa00011588328: 97ae18c1 bl ffffa0001010e62c <has_vhe>


__kern_hyp_va() is static-inline which is patched wherever it appears at boot with the EL2
ASLR values, it converts a kernel linear-map address to its EL2 KVM alias:

| ffffa0001010dc5c <__kern_hyp_va>:
| ffffa0001010dc5c: 92400000 and x0, x0, #0x1
| ffffa0001010dc60: 93c00400 ror x0, x0, #1
| ffffa0001010dc64: 91000000 add x0, x0, #0x0
| ffffa0001010dc68: 91400000 add x0, x0, #0x0, lsl #12
| ffffa0001010dc6c: 93c0fc00 ror x0, x0, #63
| ffffa0001010dc70: d65f03c0 ret


The problem here is where __kern_hyp_va() is. Its outside the __hyp_text section:
| morse@eglon:~/kernel/linux-pigs$ nm -s vmlinux | grep hyp_text
| ffffa0001158b800 T __hyp_text_end
| ffffa000115838a0 T __hyp_text_start


If I disable CONFIG_SHADOW_CALL_STACK in Kconfig, I get:
| ffffa00011527fe0 <__kvm_tlb_flush_local_vmid>:
| ffffa00011527fe0: d100c3ff sub sp, sp, #0x30
| ffffa00011527fe4: a9027bfd stp x29, x30, [sp, #32]
| ffffa00011527fe8: 910083fd add x29, sp, #0x20
| ffffa00011527fec: 92400000 and x0, x0, #0x1
| ffffa00011527ff0: 93c00400 ror x0, x0, #1
| ffffa00011527ff4: 91000000 add x0, x0, #0x0
| ffffa00011527ff8: 91400000 add x0, x0, #0x0, lsl #12
| ffffa00011527ffc: 93c0fc00 ror x0, x0, #63
| ffffa00011528000: f9400000 ldr x0, [x0]
| ffffa00011528004: 910023e1 add x1, sp, #0x8
| ffffa00011528008: 92400000 and x0, x0, #0x1
| ffffa0001152800c: 93c00400 ror x0, x0, #1
| ffffa00011528010: 91000000 add x0, x0, #0x0
| ffffa00011528014: 91400000 add x0, x0, #0x0, lsl #12
| ffffa00011528018: 93c0fc00 ror x0, x0, #63
| ffffa0001152801c: 97ffff78 bl ffffa00011527dfc <__tlb_switch_>
| ffffa00011528020: d508871f tlbi vmalle1
| ffffa00011528024: d503201f nop


This looks like reserving x18 is causing Clang to not-inline the __kern_hyp_va() calls,
losing the vitally important section information. (I can see why the compiler thinks this
is fair)

Is this a known, er, thing, with clang-9?

From eyeballing the disassembly __always_inline on __kern_hyp_va() is enough of a hint to
stop this, ... with this configuration of clang-9. But KVM still doesn't work, so it isn't
the only inlining decision KVM relies on that is changed by SCS.

I suspect repainting all KVM's 'inline' with __always_inline will fix it. (yuck!) I'll try
tomorrow.

I don't think keeping the compiler-flags as they are today for KVM is the right thing to
do, it could lead to x18 getting corrupted with the shared vhe/non-vhe code. Splitting
that code up would lead to duplication.

(hopefully objtool will be able to catch these at build time)


Thanks,

James

> SCS is currently supported only on arm64, where the compiler
> requires the x18 register to be reserved for holding the current
> task's shadow stack pointer.

> Changes in v8:
> - Added __noscs to __hyp_text instead of filtering SCS flags from
> the entire arch/arm64/kvm/hyp directory

2020-02-19 18:54:04

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH v8 00/12] add support for Clang's Shadow Call Stack

On Wed, 19 Feb 2020 at 19:38, James Morse <[email protected]> wrote:
>
> Hi Sami,
>
> (CC: +Marc)
>
> On 19/02/2020 00:08, Sami Tolvanen wrote:
> > This patch series adds support for Clang's Shadow Call Stack
> > (SCS) mitigation, which uses a separately allocated shadow stack
> > to protect against return address overwrites.
>
> I took this for a spin on some real hardware. cpu-idle, kexec hibernate etc all work
> great... but starting a KVM guest causes the CPU to get stuck in EL2.
>
> With CONFIG_SHADOW_CALL_STACK disabled, this doesn't happen ... so its something about the
> feature being enabled.
>
>
> I'm using clang-9 from debian bullseye/sid. (I tried to build tip of tree ... that doesn't
> go so well on arm64)
>
> KVM takes an instruction abort from EL2 to EL2, because some of the code it runs is not
> mapped at EL2:
>
> | ffffa00011588308 <__kvm_tlb_flush_local_vmid>:
> | ffffa00011588308: d10103ff sub sp, sp, #0x40
> | ffffa0001158830c: f90013f3 str x19, [sp, #32]
> | ffffa00011588310: a9037bfd stp x29, x30, [sp, #48]
> | ffffa00011588314: 9100c3fd add x29, sp, #0x30
> | ffffa00011588318: 97ae18bf bl ffffa0001010e614 <__kern_hyp_va>
>
> INSTRUCTION ABORT!
>
> | ffffa0001158831c: f9400000 ldr x0, [x0]
> | ffffa00011588320: 97ae18bd bl ffffa0001010e614 <__kern_hyp_va>
> | ffffa00011588324: aa0003f3 mov x19, x0
> | ffffa00011588328: 97ae18c1 bl ffffa0001010e62c <has_vhe>
>
>
> __kern_hyp_va() is static-inline which is patched wherever it appears at boot with the EL2
> ASLR values, it converts a kernel linear-map address to its EL2 KVM alias:
>
> | ffffa0001010dc5c <__kern_hyp_va>:
> | ffffa0001010dc5c: 92400000 and x0, x0, #0x1
> | ffffa0001010dc60: 93c00400 ror x0, x0, #1
> | ffffa0001010dc64: 91000000 add x0, x0, #0x0
> | ffffa0001010dc68: 91400000 add x0, x0, #0x0, lsl #12
> | ffffa0001010dc6c: 93c0fc00 ror x0, x0, #63
> | ffffa0001010dc70: d65f03c0 ret
>
>
> The problem here is where __kern_hyp_va() is. Its outside the __hyp_text section:
> | morse@eglon:~/kernel/linux-pigs$ nm -s vmlinux | grep hyp_text
> | ffffa0001158b800 T __hyp_text_end
> | ffffa000115838a0 T __hyp_text_start
>
>
> If I disable CONFIG_SHADOW_CALL_STACK in Kconfig, I get:
> | ffffa00011527fe0 <__kvm_tlb_flush_local_vmid>:
> | ffffa00011527fe0: d100c3ff sub sp, sp, #0x30
> | ffffa00011527fe4: a9027bfd stp x29, x30, [sp, #32]
> | ffffa00011527fe8: 910083fd add x29, sp, #0x20
> | ffffa00011527fec: 92400000 and x0, x0, #0x1
> | ffffa00011527ff0: 93c00400 ror x0, x0, #1
> | ffffa00011527ff4: 91000000 add x0, x0, #0x0
> | ffffa00011527ff8: 91400000 add x0, x0, #0x0, lsl #12
> | ffffa00011527ffc: 93c0fc00 ror x0, x0, #63
> | ffffa00011528000: f9400000 ldr x0, [x0]
> | ffffa00011528004: 910023e1 add x1, sp, #0x8
> | ffffa00011528008: 92400000 and x0, x0, #0x1
> | ffffa0001152800c: 93c00400 ror x0, x0, #1
> | ffffa00011528010: 91000000 add x0, x0, #0x0
> | ffffa00011528014: 91400000 add x0, x0, #0x0, lsl #12
> | ffffa00011528018: 93c0fc00 ror x0, x0, #63
> | ffffa0001152801c: 97ffff78 bl ffffa00011527dfc <__tlb_switch_>
> | ffffa00011528020: d508871f tlbi vmalle1
> | ffffa00011528024: d503201f nop
>
>
> This looks like reserving x18 is causing Clang to not-inline the __kern_hyp_va() calls,
> losing the vitally important section information. (I can see why the compiler thinks this
> is fair)
>
> Is this a known, er, thing, with clang-9?
>
> From eyeballing the disassembly __always_inline on __kern_hyp_va() is enough of a hint to
> stop this, ... with this configuration of clang-9. But KVM still doesn't work, so it isn't
> the only inlining decision KVM relies on that is changed by SCS.
>
> I suspect repainting all KVM's 'inline' with __always_inline will fix it. (yuck!) I'll try
> tomorrow.
>

If we are relying on the inlining for correctness, these should have
been __always_inline to begin with, and yuckness aside, I don't think
there's anything wrong with that.


> I don't think keeping the compiler-flags as they are today for KVM is the right thing to
> do, it could lead to x18 getting corrupted with the shared vhe/non-vhe code. Splitting
> that code up would lead to duplication.
>
> (hopefully objtool will be able to catch these at build time)
>

I don't see why we should selectively en/disable the reservation of
x18 (as I argued in the context of the EFI libstub patch as well).
Just reserving it everywhere shouldn't hurt performance, and removes
the need to prove that we reserved it in all the right places.

2020-02-19 20:14:04

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v8 00/12] add support for Clang's Shadow Call Stack

On Wed, Feb 19, 2020 at 10:38 AM James Morse <[email protected]> wrote:
> This looks like reserving x18 is causing Clang to not-inline the __kern_hyp_va() calls,
> losing the vitally important section information. (I can see why the compiler thinks this
> is fair)

Thanks for catching this. This doesn't appear to be caused by
reserving x18, it looks like SCS itself is causing clang to avoid
inlining these. If I add __noscs to __kern_hyp_va(), clang inlines the
function again. __always_inline also works, as you pointed out.

> Is this a known, er, thing, with clang-9?

I can reproduce this with ToT clang as well.

> I suspect repainting all KVM's 'inline' with __always_inline will fix it. (yuck!) I'll try
> tomorrow.

I think switching to __always_inline is the correct solution here.

Sami

2020-02-20 09:56:56

by Marc Zyngier

[permalink] [raw]
Subject: Re: [PATCH v8 00/12] add support for Clang's Shadow Call Stack

On 2020-02-19 18:53, Ard Biesheuvel wrote:
> On Wed, 19 Feb 2020 at 19:38, James Morse <[email protected]> wrote:
>>
>> Hi Sami,
>>
>> (CC: +Marc)
>>
>> On 19/02/2020 00:08, Sami Tolvanen wrote:
>> > This patch series adds support for Clang's Shadow Call Stack
>> > (SCS) mitigation, which uses a separately allocated shadow stack
>> > to protect against return address overwrites.
>>
>> I took this for a spin on some real hardware. cpu-idle, kexec
>> hibernate etc all work
>> great... but starting a KVM guest causes the CPU to get stuck in EL2.
>>
>> With CONFIG_SHADOW_CALL_STACK disabled, this doesn't happen ... so its
>> something about the
>> feature being enabled.
>>
>>
>> I'm using clang-9 from debian bullseye/sid. (I tried to build tip of
>> tree ... that doesn't
>> go so well on arm64)
>>
>> KVM takes an instruction abort from EL2 to EL2, because some of the
>> code it runs is not
>> mapped at EL2:
>>
>> | ffffa00011588308 <__kvm_tlb_flush_local_vmid>:
>> | ffffa00011588308: d10103ff sub sp, sp, #0x40
>> | ffffa0001158830c: f90013f3 str x19, [sp, #32]
>> | ffffa00011588310: a9037bfd stp x29, x30, [sp, #48]
>> | ffffa00011588314: 9100c3fd add x29, sp, #0x30
>> | ffffa00011588318: 97ae18bf bl ffffa0001010e614
>> <__kern_hyp_va>
>>
>> INSTRUCTION ABORT!
>>
>> | ffffa0001158831c: f9400000 ldr x0, [x0]
>> | ffffa00011588320: 97ae18bd bl ffffa0001010e614
>> <__kern_hyp_va>
>> | ffffa00011588324: aa0003f3 mov x19, x0
>> | ffffa00011588328: 97ae18c1 bl ffffa0001010e62c
>> <has_vhe>
>>
>>
>> __kern_hyp_va() is static-inline which is patched wherever it appears
>> at boot with the EL2
>> ASLR values, it converts a kernel linear-map address to its EL2 KVM
>> alias:
>>
>> | ffffa0001010dc5c <__kern_hyp_va>:
>> | ffffa0001010dc5c: 92400000 and x0, x0, #0x1
>> | ffffa0001010dc60: 93c00400 ror x0, x0, #1
>> | ffffa0001010dc64: 91000000 add x0, x0, #0x0
>> | ffffa0001010dc68: 91400000 add x0, x0, #0x0, lsl
>> #12
>> | ffffa0001010dc6c: 93c0fc00 ror x0, x0, #63
>> | ffffa0001010dc70: d65f03c0 ret
>>
>>
>> The problem here is where __kern_hyp_va() is. Its outside the
>> __hyp_text section:
>> | morse@eglon:~/kernel/linux-pigs$ nm -s vmlinux | grep hyp_text
>> | ffffa0001158b800 T __hyp_text_end
>> | ffffa000115838a0 T __hyp_text_start
>>
>>
>> If I disable CONFIG_SHADOW_CALL_STACK in Kconfig, I get:
>> | ffffa00011527fe0 <__kvm_tlb_flush_local_vmid>:
>> | ffffa00011527fe0: d100c3ff sub sp, sp, #0x30
>> | ffffa00011527fe4: a9027bfd stp x29, x30, [sp, #32]
>> | ffffa00011527fe8: 910083fd add x29, sp, #0x20
>> | ffffa00011527fec: 92400000 and x0, x0, #0x1
>> | ffffa00011527ff0: 93c00400 ror x0, x0, #1
>> | ffffa00011527ff4: 91000000 add x0, x0, #0x0
>> | ffffa00011527ff8: 91400000 add x0, x0, #0x0, lsl
>> #12
>> | ffffa00011527ffc: 93c0fc00 ror x0, x0, #63
>> | ffffa00011528000: f9400000 ldr x0, [x0]
>> | ffffa00011528004: 910023e1 add x1, sp, #0x8
>> | ffffa00011528008: 92400000 and x0, x0, #0x1
>> | ffffa0001152800c: 93c00400 ror x0, x0, #1
>> | ffffa00011528010: 91000000 add x0, x0, #0x0
>> | ffffa00011528014: 91400000 add x0, x0, #0x0, lsl
>> #12
>> | ffffa00011528018: 93c0fc00 ror x0, x0, #63
>> | ffffa0001152801c: 97ffff78 bl ffffa00011527dfc
>> <__tlb_switch_>
>> | ffffa00011528020: d508871f tlbi vmalle1
>> | ffffa00011528024: d503201f nop
>>
>>
>> This looks like reserving x18 is causing Clang to not-inline the
>> __kern_hyp_va() calls,
>> losing the vitally important section information. (I can see why the
>> compiler thinks this
>> is fair)
>>
>> Is this a known, er, thing, with clang-9?
>>
>> From eyeballing the disassembly __always_inline on __kern_hyp_va() is
>> enough of a hint to
>> stop this, ... with this configuration of clang-9. But KVM still
>> doesn't work, so it isn't
>> the only inlining decision KVM relies on that is changed by SCS.
>>
>> I suspect repainting all KVM's 'inline' with __always_inline will fix
>> it. (yuck!) I'll try
>> tomorrow.
>>
>
> If we are relying on the inlining for correctness, these should have
> been __always_inline to begin with, and yuckness aside, I don't think
> there's anything wrong with that.

Agreed. Not having __always_inline is definitely an oversight, and we
should fix it ASAP (hell knows what another compiler could produce...).
And the whole EL2 aliasing is utter yuck already, this isn't going to
make things much worse...

I can queue something today for __kern_hyp_va(), but I'd like to make
sure there isn't other silly mistakes like this one somewhere...

>> I don't think keeping the compiler-flags as they are today for KVM is
>> the right thing to
>> do, it could lead to x18 getting corrupted with the shared vhe/non-vhe
>> code. Splitting
>> that code up would lead to duplication.
>>
>> (hopefully objtool will be able to catch these at build time)
>>
>
> I don't see why we should selectively en/disable the reservation of
> x18 (as I argued in the context of the EFI libstub patch as well).
> Just reserving it everywhere shouldn't hurt performance, and removes
> the need to prove that we reserved it in all the right places.

I'd certainly like to keep things simple if we can.

M.
--
Jazz is not dead. It just smells funny...

2020-02-25 17:47:29

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v9 00/12] add support for Clang's Shadow Call Stack

This patch series adds support for Clang's Shadow Call Stack
(SCS) mitigation, which uses a separately allocated shadow stack
to protect against return address overwrites. More information
can be found here:

https://clang.llvm.org/docs/ShadowCallStack.html

SCS provides better protection against traditional buffer
overflows than CONFIG_STACKPROTECTOR_*, but it should be noted
that SCS security guarantees in the kernel differ from the ones
documented for user space. The kernel must store addresses of
shadow stacks used by inactive tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

SCS is currently supported only on arm64, where the compiler
requires the x18 register to be reserved for holding the current
task's shadow stack pointer.

With -fsanitize=shadow-call-stack, the compiler injects
instructions to all non-leaf C functions to store the return
address to the shadow stack, and unconditionally load it again
before returning. As a result, SCS is currently incompatible
with features that rely on modifying function return addresses
in the kernel stack to alter control flow. A copy of the return
address is still kept in the kernel stack for compatibility with
stack unwinding, for example.

SCS has a minimal performance overhead, but allocating
shadow stacks increases kernel memory usage. The feature is
therefore mostly useful on hardware that lacks support for PAC
instructions.

Changes in v9:
- Fixed grammar in the Kconfig help text.
- Changed Kconfig to allow SCS to be selected with the patchable-
function-entry graph tracer.
- Changed the EFI stub patch to not filter out -ffixed-x18, only
SCS flags.

Changes in v8:
- Added __noscs to __hyp_text instead of filtering SCS flags from
the entire arch/arm64/kvm/hyp directory.
- Added a patch to filter out -ffixed-x18 and SCS flags from the
EFI stub.

Changes in v7:
- Changed irq_stack_entry/exit to store the shadow stack pointer
in x24 instead of x20 as kernel_entry uses x20-x23 to store
data that can be used later. Updated the comment as well.
- Changed the Makefile in arch/arm64/kvm/hyp to also filter out
-ffixed-x18.
- Changed SHADOW_CALL_STACK to depend on !FUNCTION_GRAPH_TRACER
instead of not selecting HAVE_FUNCTION_GRAPH_TRACER with SCS.
- Removed ifdefs from the EFI wrapper and updated the comment to
explain why we are restoring x18.
- Rebased as Ard's x18 patches that were part of this series have
already been merged.

Changes in v6:
- Updated comment in the EFI RT wrapper to include the
explanation from the commit message.
- Fixed the SHADOW_CALL_STACK_VMAP config option and the
compilation errors in scs_init_irq()
- Updated the comment in entry.S to Mark's suggestion
- Fixed the WARN_ON in scs_init() to trip only when the return
value for cpuhp_setup_state() is < 0.
- Removed ifdefs from the code in arch/arm64/kernel/scs.c and
added separate shadow stacks for the SDEI handler

Changes in v5:
- Updated the comment in __scs_base() to Mark's suggestion
- Changed all instances of uintptr_t to unsigned long
- Added allocation poisoning for KASAN to catch unintentional
shadow stack accesses; moved set_set_magic before poisoning
and switched scs_used() and scs_corrupted() to access the
buffer using READ_ONCE_NOCHECK() instead
- Changed scs_free() to check for NULL instead of zero
- Renamed SCS_CACHE_SIZE to NR_CACHED_SCS
- Added a warning if cpuhp_setup_state fails in scs_init()
- Dropped patches disabling kretprobes after confirming there's
no functional conflict with SCS instrumentation
- Added an explanation to the commit message why function graph
tracing and SCS are incompatible
- Removed the ifdefs from arch/arm64/mm/proc.S and added
comments explaining why we are saving and restoring x18
- Updated scs_check_usage format to include process information

Changes in v4:
- Fixed authorship for Ard's patches
- Added missing commit messages
- Commented code that clears SCS from thread_info
- Added a comment about SCS_END_MAGIC being non-canonical

Changes in v3:
- Switched to filter-out for removing SCS flags in Makefiles
- Changed the __noscs attribute to use __no_sanitize__("...")
instead of no_sanitize("...")
- Cleaned up inline function definitions and moved task_scs()
into a macro
- Cleaned up scs_free() and scs_magic()
- Moved SCS initialization into dup_task_struct() and removed
the now unused scs_task_init()
- Added comments to __scs_base() and scs_task_reset() to better
document design choices
- Changed copy_page to make the offset and bias explicit

Changes in v2:
- Changed Ard's KVM patch to use x29 instead of x18 for the
guest context, which makes restore_callee_saved_regs cleaner
- Updated help text (and commit messages) to point out
differences in security properties compared to user space SCS
- Cleaned up config options: removed the ROP protection choice,
replaced the CC_IS_CLANG dependency with an arch-specific
cc-option test, and moved disabling of incompatible config
options to an arch-specific Kconfig
- Added CC_FLAGS_SCS, which are filtered out where needed
instead of using DISABLE_SCS
- Added a __has_feature guard around __noscs for older clang
versions

Sami Tolvanen (12):
add support for Clang's Shadow Call Stack (SCS)
scs: add accounting
scs: add support for stack usage debugging
scs: disable when function graph tracing is enabled
arm64: reserve x18 from general allocation with SCS
arm64: preserve x18 when CPU is suspended
arm64: efi: restore x18 if it was corrupted
arm64: vdso: disable Shadow Call Stack
arm64: disable SCS for hypervisor code
arm64: implement Shadow Call Stack
arm64: scs: add shadow stacks for SDEI
efi/libstub: disable SCS

Makefile | 6 +
arch/Kconfig | 35 ++++
arch/arm64/Kconfig | 5 +
arch/arm64/Makefile | 4 +
arch/arm64/include/asm/kvm_hyp.h | 2 +-
arch/arm64/include/asm/scs.h | 39 ++++
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/include/asm/thread_info.h | 3 +
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +
arch/arm64/kernel/efi-rt-wrapper.S | 11 +-
arch/arm64/kernel/entry.S | 46 ++++-
arch/arm64/kernel/head.S | 9 +
arch/arm64/kernel/irq.c | 2 +
arch/arm64/kernel/process.c | 2 +
arch/arm64/kernel/scs.c | 114 ++++++++++++
arch/arm64/kernel/sdei.c | 7 +
arch/arm64/kernel/smp.c | 4 +
arch/arm64/kernel/vdso/Makefile | 2 +-
arch/arm64/mm/proc.S | 14 ++
drivers/base/node.c | 6 +
drivers/firmware/efi/libstub/Makefile | 3 +
fs/proc/meminfo.c | 4 +
include/linux/compiler-clang.h | 6 +
include/linux/compiler_types.h | 4 +
include/linux/mmzone.h | 3 +
include/linux/scs.h | 57 ++++++
init/init_task.c | 8 +
kernel/Makefile | 1 +
kernel/fork.c | 9 +
kernel/sched/core.c | 2 +
kernel/scs.c | 246 ++++++++++++++++++++++++++
mm/page_alloc.c | 6 +
mm/vmstat.c | 3 +
34 files changed, 662 insertions(+), 7 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c


base-commit: 63623fd44972d1ed2bfb6e0fb631dfcf547fd1e7
--
2.25.0.265.gbab2e86ba0-goog

2020-02-25 17:48:57

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v9 01/12] add support for Clang's Shadow Call Stack (SCS)

This change adds generic support for Clang's Shadow Call Stack,
which uses a shadow stack to protect return addresses from being
overwritten by an attacker. Details are available here:

https://clang.llvm.org/docs/ShadowCallStack.html

Note that security guarantees in the kernel differ from the
ones documented for user space. The kernel must store addresses
of shadow stacks used by other tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Miguel Ojeda <[email protected]>
---
Makefile | 6 ++
arch/Kconfig | 34 ++++++
include/linux/compiler-clang.h | 6 ++
include/linux/compiler_types.h | 4 +
include/linux/scs.h | 57 ++++++++++
init/init_task.c | 8 ++
kernel/Makefile | 1 +
kernel/fork.c | 9 ++
kernel/sched/core.c | 2 +
kernel/scs.c | 187 +++++++++++++++++++++++++++++++++
10 files changed, 314 insertions(+)
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c

diff --git a/Makefile b/Makefile
index 0914049d2929..ea465905b399 100644
--- a/Makefile
+++ b/Makefile
@@ -845,6 +845,12 @@ ifdef CONFIG_LIVEPATCH
KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
endif

+ifdef CONFIG_SHADOW_CALL_STACK
+CC_FLAGS_SCS := -fsanitize=shadow-call-stack
+KBUILD_CFLAGS += $(CC_FLAGS_SCS)
+export CC_FLAGS_SCS
+endif
+
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)

diff --git a/arch/Kconfig b/arch/Kconfig
index 98de654b79b3..a67fa78c92e7 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -526,6 +526,40 @@ config STACKPROTECTOR_STRONG
about 20% of all kernel functions, which increases the kernel code
size by about 2%.

+config ARCH_SUPPORTS_SHADOW_CALL_STACK
+ bool
+ help
+ An architecture should select this if it supports Clang's Shadow
+ Call Stack, has asm/scs.h, and implements runtime support for shadow
+ stack switching.
+
+config SHADOW_CALL_STACK
+ bool "Clang Shadow Call Stack"
+ depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
+ help
+ This option enables Clang's Shadow Call Stack, which uses a
+ shadow stack to protect function return addresses from being
+ overwritten by an attacker. More information can be found in
+ Clang's documentation:
+
+ https://clang.llvm.org/docs/ShadowCallStack.html
+
+ Note that security guarantees in the kernel differ from the ones
+ documented for user space. The kernel must store addresses of shadow
+ stacks used by other tasks and interrupt handlers in memory, which
+ means an attacker capable of reading and writing arbitrary memory
+ may be able to locate them and hijack control flow by modifying
+ shadow stacks that are not currently in use.
+
+config SHADOW_CALL_STACK_VMAP
+ bool "Use virtually mapped shadow call stacks"
+ depends on SHADOW_CALL_STACK
+ help
+ Use virtually mapped shadow call stacks. Selecting this option
+ provides better stack exhaustion protection, but increases per-thread
+ memory consumption as a full page is allocated for each shadow stack.
+
+
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 333a6695a918..18fc4d29ef27 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -42,3 +42,9 @@
* compilers, like ICC.
*/
#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#if __has_feature(shadow_call_stack)
+# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
+#else
+# define __noscs
+#endif
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 72393a8c1a6c..be5d5be4b1ae 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -202,6 +202,10 @@ struct ftrace_likely_data {
# define randomized_struct_fields_end
#endif

+#ifndef __noscs
+# define __noscs
+#endif
+
#ifndef asm_volatile_goto
#define asm_volatile_goto(x...) asm goto(x)
#endif
diff --git a/include/linux/scs.h b/include/linux/scs.h
new file mode 100644
index 000000000000..c5572fd770b0
--- /dev/null
+++ b/include/linux/scs.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#ifndef _LINUX_SCS_H
+#define _LINUX_SCS_H
+
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+/*
+ * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
+ * architecture) provided ~40% safety margin on stack usage while keeping
+ * memory allocation overhead reasonable.
+ */
+#define SCS_SIZE 1024UL
+#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
+
+/*
+ * A random number outside the kernel's virtual address space to mark the
+ * end of the shadow stack.
+ */
+#define SCS_END_MAGIC 0xaf0194819b1635f6UL
+
+#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack)
+
+static inline void task_set_scs(struct task_struct *tsk, void *s)
+{
+ task_scs(tsk) = s;
+}
+
+extern void scs_init(void);
+extern void scs_task_reset(struct task_struct *tsk);
+extern int scs_prepare(struct task_struct *tsk, int node);
+extern bool scs_corrupted(struct task_struct *tsk);
+extern void scs_release(struct task_struct *tsk);
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+#define task_scs(tsk) NULL
+
+static inline void task_set_scs(struct task_struct *tsk, void *s) {}
+static inline void scs_init(void) {}
+static inline void scs_task_reset(struct task_struct *tsk) {}
+static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
+static inline bool scs_corrupted(struct task_struct *tsk) { return false; }
+static inline void scs_release(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* _LINUX_SCS_H */
diff --git a/init/init_task.c b/init/init_task.c
index 9e5cbe5eab7b..cbd40460e903 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/audit.h>
#include <linux/numa.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <linux/uaccess.h>
@@ -184,6 +185,13 @@ struct task_struct init_task
};
EXPORT_SYMBOL(init_task);

+#ifdef CONFIG_SHADOW_CALL_STACK
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
+ __aligned(SCS_SIZE) = {
+ [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
+};
+#endif
+
/*
* Initial thread structure. Alignment of this is handled by a special
* linker map entry.
diff --git a/kernel/Makefile b/kernel/Makefile
index 4cb4130ced32..c332eb9d4841 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -103,6 +103,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-$(CONFIG_PERF_EVENTS) += events/

diff --git a/kernel/fork.c b/kernel/fork.c
index 60a1295f4384..2bc73d654593 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,6 +94,7 @@
#include <linux/thread_info.h>
#include <linux/stackleak.h>
#include <linux/kasan.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -454,6 +455,8 @@ void put_task_stack(struct task_struct *tsk)

void free_task(struct task_struct *tsk)
{
+ scs_release(tsk);
+
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
@@ -837,6 +840,8 @@ void __init fork_init(void)
NULL, free_vm_stack_cache);
#endif

+ scs_init();
+
lockdep_init_task(&init_task);
uprobes_init();
}
@@ -896,6 +901,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_stack;

+ err = scs_prepare(tsk, node);
+ if (err)
+ goto free_stack;
+
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1a9983da4408..7473cd685560 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11,6 +11,7 @@
#include <linux/nospec.h>

#include <linux/kcov.h>
+#include <linux/scs.h>

#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -6036,6 +6037,7 @@ void init_idle(struct task_struct *idle, int cpu)
idle->se.exec_start = sched_clock();
idle->flags |= PF_IDLE;

+ scs_task_reset(idle);
kasan_unpoison_task_stack(idle);

#ifdef CONFIG_SMP
diff --git a/kernel/scs.c b/kernel/scs.c
new file mode 100644
index 000000000000..28abed21950c
--- /dev/null
+++ b/kernel/scs.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/kasan.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/scs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/scs.h>
+
+static inline void *__scs_base(struct task_struct *tsk)
+{
+ /*
+ * To minimize risk the of exposure, architectures may clear a
+ * task's thread_info::shadow_call_stack while that task is
+ * running, and only save/restore the active shadow call stack
+ * pointer when the usual register may be clobbered (e.g. across
+ * context switches).
+ *
+ * The shadow call stack is aligned to SCS_SIZE, and grows
+ * upwards, so we can mask out the low bits to extract the base
+ * when the task is not running.
+ */
+ return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
+}
+
+static inline unsigned long *scs_magic(void *s)
+{
+ return (unsigned long *)(s + SCS_SIZE) - 1;
+}
+
+static inline void scs_set_magic(void *s)
+{
+ *scs_magic(s) = SCS_END_MAGIC;
+}
+
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+
+/* Matches NR_CACHED_STACKS for VMAP_STACK */
+#define NR_CACHED_SCS 2
+static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]);
+
+static void *scs_alloc(int node)
+{
+ int i;
+ void *s;
+
+ for (i = 0; i < NR_CACHED_SCS; i++) {
+ s = this_cpu_xchg(scs_cache[i], NULL);
+ if (s) {
+ memset(s, 0, SCS_SIZE);
+ goto out;
+ }
+ }
+
+ /*
+ * We allocate a full page for the shadow stack, which should be
+ * more than we need. Check the assumption nevertheless.
+ */
+ BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);
+
+ s = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL, 0,
+ node, __builtin_return_address(0));
+
+out:
+ if (s)
+ scs_set_magic(s);
+ /* TODO: poison for KASAN, unpoison in scs_free */
+
+ return s;
+}
+
+static void scs_free(void *s)
+{
+ int i;
+
+ for (i = 0; i < NR_CACHED_SCS; i++)
+ if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL)
+ return;
+
+ vfree_atomic(s);
+}
+
+static int scs_cleanup(unsigned int cpu)
+{
+ int i;
+ void **cache = per_cpu_ptr(scs_cache, cpu);
+
+ for (i = 0; i < NR_CACHED_SCS; i++) {
+ vfree(cache[i]);
+ cache[i] = NULL;
+ }
+
+ return 0;
+}
+
+void __init scs_init(void)
+{
+ WARN_ON(cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
+ scs_cleanup) < 0);
+}
+
+#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */
+
+static struct kmem_cache *scs_cache;
+
+static inline void *scs_alloc(int node)
+{
+ void *s;
+
+ s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
+ if (s) {
+ scs_set_magic(s);
+ /*
+ * Poison the allocation to catch unintentional accesses to
+ * the shadow stack when KASAN is enabled.
+ */
+ kasan_poison_object_data(scs_cache, s);
+ }
+
+ return s;
+}
+
+static inline void scs_free(void *s)
+{
+ kasan_unpoison_object_data(scs_cache, s);
+ kmem_cache_free(scs_cache, s);
+}
+
+void __init scs_init(void)
+{
+ scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
+ 0, NULL);
+ WARN_ON(!scs_cache);
+}
+
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+void scs_task_reset(struct task_struct *tsk)
+{
+ /*
+ * Reset the shadow stack to the base address in case the task
+ * is reused.
+ */
+ task_set_scs(tsk, __scs_base(tsk));
+}
+
+int scs_prepare(struct task_struct *tsk, int node)
+{
+ void *s;
+
+ s = scs_alloc(node);
+ if (!s)
+ return -ENOMEM;
+
+ task_set_scs(tsk, s);
+ return 0;
+}
+
+bool scs_corrupted(struct task_struct *tsk)
+{
+ unsigned long *magic = scs_magic(__scs_base(tsk));
+
+ return READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
+}
+
+void scs_release(struct task_struct *tsk)
+{
+ void *s;
+
+ s = __scs_base(tsk);
+ if (!s)
+ return;
+
+ WARN_ON(scs_corrupted(tsk));
+
+ task_set_scs(tsk, NULL);
+ scs_free(s);
+}
--
2.25.0.265.gbab2e86ba0-goog

2020-02-25 17:48:59

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v9 02/12] scs: add accounting

This change adds accounting for the memory allocated for shadow stacks.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
drivers/base/node.c | 6 ++++++
fs/proc/meminfo.c | 4 ++++
include/linux/mmzone.h | 3 +++
kernel/scs.c | 20 ++++++++++++++++++++
mm/page_alloc.c | 6 ++++++
mm/vmstat.c | 3 +++
6 files changed, 42 insertions(+)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 98a31bafc8a2..874a8b428438 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d AnonPages: %8lu kB\n"
"Node %d Shmem: %8lu kB\n"
"Node %d KernelStack: %8lu kB\n"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ "Node %d ShadowCallStack:%8lu kB\n"
+#endif
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
@@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8c1f1bb1a5ce..49768005a79e 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB));
+#ifdef CONFIG_SHADOW_CALL_STACK
+ seq_printf(m, "ShadowCallStack:%8lu kB\n",
+ global_zone_page_state(NR_KERNEL_SCS_BYTES) / 1024);
+#endif
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 462f6873905a..0a6f395abc68 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -200,6 +200,9 @@ enum zone_stat_item {
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ NR_KERNEL_SCS_BYTES, /* measured in bytes */
+#endif
/* Second 128 byte cacheline */
NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
diff --git a/kernel/scs.c b/kernel/scs.c
index 28abed21950c..5245e992c692 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -12,6 +12,7 @@
#include <linux/scs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/vmstat.h>
#include <asm/scs.h>

static inline void *__scs_base(struct task_struct *tsk)
@@ -89,6 +90,11 @@ static void scs_free(void *s)
vfree_atomic(s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return vmalloc_to_page(__scs_base(tsk));
+}
+
static int scs_cleanup(unsigned int cpu)
{
int i;
@@ -135,6 +141,11 @@ static inline void scs_free(void *s)
kmem_cache_free(scs_cache, s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return virt_to_page(__scs_base(tsk));
+}
+
void __init scs_init(void)
{
scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
@@ -153,6 +164,12 @@ void scs_task_reset(struct task_struct *tsk)
task_set_scs(tsk, __scs_base(tsk));
}

+static void scs_account(struct task_struct *tsk, int account)
+{
+ mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_BYTES,
+ account * SCS_SIZE);
+}
+
int scs_prepare(struct task_struct *tsk, int node)
{
void *s;
@@ -162,6 +179,8 @@ int scs_prepare(struct task_struct *tsk, int node)
return -ENOMEM;

task_set_scs(tsk, s);
+ scs_account(tsk, 1);
+
return 0;
}

@@ -182,6 +201,7 @@ void scs_release(struct task_struct *tsk)

WARN_ON(scs_corrupted(tsk));

+ scs_account(tsk, -1);
task_set_scs(tsk, NULL);
scs_free(s);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3c4eb750a199..1381b9d84e4c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5340,6 +5340,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" managed:%lukB"
" mlocked:%lukB"
" kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ " shadow_call_stack:%lukB"
+#endif
" pagetables:%lukB"
" bounce:%lukB"
" free_pcp:%lukB"
@@ -5362,6 +5365,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)),
zone_page_state(zone, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ zone_page_state(zone, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
K(zone_page_state(zone, NR_PAGETABLE)),
K(zone_page_state(zone, NR_BOUNCE)),
K(free_pcp),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 78d53378db99..d0650391c8c1 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1119,6 +1119,9 @@ const char * const vmstat_text[] = {
"nr_mlock",
"nr_page_table_pages",
"nr_kernel_stack",
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ "nr_shadow_call_stack_bytes",
+#endif
"nr_bounce",
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
--
2.25.0.265.gbab2e86ba0-goog

2020-02-25 17:49:44

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v9 04/12] scs: disable when function graph tracing is enabled

The graph tracer hooks returns by modifying frame records on the
(regular) stack, but with SCS the return address is taken from the
shadow stack, and the value in the frame record has no effect. As we
don't currently have a mechanism to determine the corresponding slot
on the shadow stack (and to pass this through the ftrace
infrastructure), for now let's disable SCS when the graph tracer is
enabled.

With SCS the return address is taken from the shadow stack and the
value in the frame record has no effect. The mcount based graph tracer
hooks returns by modifying frame records on the (regular) stack, and
thus is not compatible. The patchable-function-entry graph tracer
used for DYNAMIC_FTRACE_WITH_REGS modifies the LR before it is saved
to the shadow stack, and is compatible.

Modifying the mcount based graph tracer to work with SCS would require
a mechanism to determine the corresponding slot on the shadow stack
(and to pass this through the ftrace infrastructure), and we expect
that everyone will eventually move to the patchable-function-entry
based graph tracer anyway, so for now let's disable SCS when the
mcount-based graph tracer is enabled.

SCS and patchable-function-entry are both supported from LLVM 10.x.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/Kconfig | 1 +
1 file changed, 1 insertion(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index a67fa78c92e7..d53ade0950a5 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -535,6 +535,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK

config SHADOW_CALL_STACK
bool "Clang Shadow Call Stack"
+ depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
help
This option enables Clang's Shadow Call Stack, which uses a
--
2.25.0.265.gbab2e86ba0-goog

2020-02-25 17:50:28

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v9 03/12] scs: add support for stack usage debugging

Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks. When enabled,
also prints out the highest shadow stack usage per process.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
kernel/scs.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)

diff --git a/kernel/scs.c b/kernel/scs.c
index 5245e992c692..ad74d13f2c0f 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -184,6 +184,44 @@ int scs_prepare(struct task_struct *tsk, int node)
return 0;
}

+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long scs_used(struct task_struct *tsk)
+{
+ unsigned long *p = __scs_base(tsk);
+ unsigned long *end = scs_magic(p);
+ unsigned long s = (unsigned long)p;
+
+ while (p < end && READ_ONCE_NOCHECK(*p))
+ p++;
+
+ return (unsigned long)p - s;
+}
+
+static void scs_check_usage(struct task_struct *tsk)
+{
+ static DEFINE_SPINLOCK(lock);
+ static unsigned long highest;
+ unsigned long used = scs_used(tsk);
+
+ if (used <= highest)
+ return;
+
+ spin_lock(&lock);
+
+ if (used > highest) {
+ pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
+ tsk->comm, task_pid_nr(tsk), used);
+ highest = used;
+ }
+
+ spin_unlock(&lock);
+}
+#else
+static inline void scs_check_usage(struct task_struct *tsk)
+{
+}
+#endif
+
bool scs_corrupted(struct task_struct *tsk)
{
unsigned long *magic = scs_magic(__scs_base(tsk));
@@ -200,6 +238,7 @@ void scs_release(struct task_struct *tsk)
return;

WARN_ON(scs_corrupted(tsk));
+ scs_check_usage(tsk);

scs_account(tsk, -1);
task_set_scs(tsk, NULL);
--
2.25.0.265.gbab2e86ba0-goog

2020-02-25 17:50:40

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v9 05/12] arm64: reserve x18 from general allocation with SCS

Reserve the x18 register from general allocation when SCS is enabled,
because the compiler uses the register to store the current task's
shadow stack pointer. Note that all external kernel modules must also be
compiled with -ffixed-x18 if the kernel has SCS enabled.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/Makefile | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index dca1a97751ab..ab26b448faa9 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -65,6 +65,10 @@ stack_protector_prepare: prepare0
include/generated/asm-offsets.h))
endif

+ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
+KBUILD_CFLAGS += -ffixed-x18
+endif
+
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
CHECKFLAGS += -D__AARCH64EB__
--
2.25.0.265.gbab2e86ba0-goog

2020-02-25 17:54:40

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v9 06/12] arm64: preserve x18 when CPU is suspended

Don't lose the current task's shadow stack when the CPU is suspended.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/mm/proc.S | 14 ++++++++++++++
2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 8939c87c4dce..0cde2f473971 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H

-#define NR_CTX_REGS 12
+#define NR_CTX_REGS 13
#define NR_CALLEE_SAVED_REGS 12

/*
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index aafed6902411..7d37e3c70ff5 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -56,6 +56,8 @@
* cpu_do_suspend - save CPU registers context
*
* x0: virtual address of context pointer
+ *
+ * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
*/
SYM_FUNC_START(cpu_do_suspend)
mrs x2, tpidr_el0
@@ -80,6 +82,11 @@ alternative_endif
stp x8, x9, [x0, #48]
stp x10, x11, [x0, #64]
stp x12, x13, [x0, #80]
+ /*
+ * Save x18 as it may be used as a platform register, e.g. by shadow
+ * call stack.
+ */
+ str x18, [x0, #96]
ret
SYM_FUNC_END(cpu_do_suspend)

@@ -96,6 +103,13 @@ SYM_FUNC_START(cpu_do_resume)
ldp x9, x10, [x0, #48]
ldp x11, x12, [x0, #64]
ldp x13, x14, [x0, #80]
+ /*
+ * Restore x18, as it may be used as a platform register, and clear
+ * the buffer to minimize the risk of exposure when used for shadow
+ * call stack.
+ */
+ ldr x18, [x0, #96]
+ str xzr, [x0, #96]
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
--
2.25.0.265.gbab2e86ba0-goog

2020-02-25 17:54:44

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v9 07/12] arm64: efi: restore x18 if it was corrupted

If we detect a corrupted x18, restore the register before jumping back
to potentially SCS instrumented code. This is safe, because the wrapper
is called with preemption disabled and a separate shadow stack is used
for interrupt handling.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/kernel/efi-rt-wrapper.S | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 3fc71106cb2b..6ca6c0dc11a1 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -34,5 +34,14 @@ ENTRY(__efi_rt_asm_wrapper)
ldp x29, x30, [sp], #32
b.ne 0f
ret
-0: b efi_handle_corrupted_x18 // tail call
+0:
+ /*
+ * With CONFIG_SHADOW_CALL_STACK, the kernel uses x18 to store a
+ * shadow stack pointer, which we need to restore before returning to
+ * potentially instrumented code. This is safe because the wrapper is
+ * called with preemption disabled and a separate shadow stack is used
+ * for interrupts.
+ */
+ mov x18, x2
+ b efi_handle_corrupted_x18 // tail call
ENDPROC(__efi_rt_asm_wrapper)
--
2.25.0.265.gbab2e86ba0-goog

2020-02-25 17:54:45

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v9 08/12] arm64: vdso: disable Shadow Call Stack

Shadow stacks are only available in the kernel, so disable SCS
instrumentation for the vDSO.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/kernel/vdso/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index dd2514bb1511..a87a4f11724e 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -25,7 +25,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING

VDSO_LDFLAGS := -Bsymbolic

-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS)
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.25.0.265.gbab2e86ba0-goog

2020-02-25 17:54:56

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v9 09/12] arm64: disable SCS for hypervisor code

Disable SCS for code that runs at a different exception level by
adding __noscs to __hyp_text.

Suggested-by: James Morse <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
Acked-by: Marc Zyngier <[email protected]>
---
arch/arm64/include/asm/kvm_hyp.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index a3a6a2ba9a63..0f0603f55ea0 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -13,7 +13,7 @@
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>

-#define __hyp_text __section(.hyp.text) notrace
+#define __hyp_text __section(.hyp.text) notrace __noscs

#define read_sysreg_elx(r,nvh,vh) \
({ \
--
2.25.0.265.gbab2e86ba0-goog

2020-02-25 17:55:15

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v9 11/12] arm64: scs: add shadow stacks for SDEI

This change adds per-CPU shadow call stacks for the SDEI handler.
Similarly to how the kernel stacks are handled, we add separate shadow
stacks for normal and critical events.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: James Morse <[email protected]>
Tested-by: James Morse <[email protected]>
---
arch/arm64/include/asm/scs.h | 2 +
arch/arm64/kernel/entry.S | 14 ++++-
arch/arm64/kernel/scs.c | 106 +++++++++++++++++++++++++++++------
arch/arm64/kernel/sdei.c | 7 +++
4 files changed, 112 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
index c50d2b0c6c5f..8e327e14bc15 100644
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -9,6 +9,7 @@
#ifdef CONFIG_SHADOW_CALL_STACK

extern void scs_init_irq(void);
+extern int scs_init_sdei(void);

static __always_inline void scs_save(struct task_struct *tsk)
{
@@ -27,6 +28,7 @@ static inline void scs_overflow_check(struct task_struct *tsk)
#else /* CONFIG_SHADOW_CALL_STACK */

static inline void scs_init_irq(void) {}
+static inline int scs_init_sdei(void) { return 0; }
static inline void scs_save(struct task_struct *tsk) {}
static inline void scs_overflow_check(struct task_struct *tsk) {}

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 4b18c3bbdea5..2e2ce1b9ebf5 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1050,13 +1050,16 @@ ENTRY(__sdei_asm_handler)

mov x19, x1

+#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
+ ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
+#endif
+
#ifdef CONFIG_VMAP_STACK
/*
* entry.S may have been using sp as a scratch register, find whether
* this is a normal or critical event and switch to the appropriate
* stack for this CPU.
*/
- ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
cbnz w4, 1f
ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
b 2f
@@ -1066,6 +1069,15 @@ ENTRY(__sdei_asm_handler)
mov sp, x5
#endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* Use a separate shadow call stack for normal and critical events */
+ cbnz w4, 3f
+ ldr_this_cpu dst=x18, sym=sdei_shadow_call_stack_normal_ptr, tmp=x6
+ b 4f
+3: ldr_this_cpu dst=x18, sym=sdei_shadow_call_stack_critical_ptr, tmp=x6
+4:
+#endif
+
/*
* We may have interrupted userspace, or a guest, or exit-from or
* return-to either of these. We can't trust sp_el0, restore it.
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
index eaadf5430baa..dddb7c56518b 100644
--- a/arch/arm64/kernel/scs.c
+++ b/arch/arm64/kernel/scs.c
@@ -10,31 +10,105 @@
#include <asm/pgtable.h>
#include <asm/scs.h>

-DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+#define DECLARE_SCS(name) \
+ DECLARE_PER_CPU(unsigned long *, name ## _ptr); \
+ DECLARE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name)

-#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
-DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
- __aligned(SCS_SIZE);
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+#define DEFINE_SCS(name) \
+ DEFINE_PER_CPU(unsigned long *, name ## _ptr)
+#else
+/* Allocate a static per-CPU shadow stack */
+#define DEFINE_SCS(name) \
+ DEFINE_PER_CPU(unsigned long *, name ## _ptr); \
+ DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \
+ __aligned(SCS_SIZE)
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+DECLARE_SCS(irq_shadow_call_stack);
+DECLARE_SCS(sdei_shadow_call_stack_normal);
+DECLARE_SCS(sdei_shadow_call_stack_critical);
+
+DEFINE_SCS(irq_shadow_call_stack);
+#ifdef CONFIG_ARM_SDE_INTERFACE
+DEFINE_SCS(sdei_shadow_call_stack_normal);
+DEFINE_SCS(sdei_shadow_call_stack_critical);
#endif

+static int scs_alloc_percpu(unsigned long * __percpu *ptr, int cpu)
+{
+ unsigned long *p;
+
+ p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ if (!p)
+ return -ENOMEM;
+ per_cpu(*ptr, cpu) = p;
+
+ return 0;
+}
+
+static void scs_free_percpu(unsigned long * __percpu *ptr, int cpu)
+{
+ unsigned long *p = per_cpu(*ptr, cpu);
+
+ if (p) {
+ per_cpu(*ptr, cpu) = NULL;
+ vfree(p);
+ }
+}
+
+static void scs_free_sdei(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ scs_free_percpu(&sdei_shadow_call_stack_normal_ptr, cpu);
+ scs_free_percpu(&sdei_shadow_call_stack_critical_ptr, cpu);
+ }
+}
+
void scs_init_irq(void)
{
int cpu;

for_each_possible_cpu(cpu) {
-#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
- unsigned long *p;
+ if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK_VMAP))
+ WARN_ON(scs_alloc_percpu(&irq_shadow_call_stack_ptr,
+ cpu));
+ else
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ per_cpu(irq_shadow_call_stack, cpu);
+ }
+}

- p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
- VMALLOC_START, VMALLOC_END,
- GFP_SCS, PAGE_KERNEL,
- 0, cpu_to_node(cpu),
- __builtin_return_address(0));
+int scs_init_sdei(void)
+{
+ int cpu;

- per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
-#else
- per_cpu(irq_shadow_call_stack_ptr, cpu) =
- per_cpu(irq_shadow_call_stack, cpu);
-#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+ if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
+ return 0;
+
+ for_each_possible_cpu(cpu) {
+ if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK_VMAP)) {
+ if (scs_alloc_percpu(
+ &sdei_shadow_call_stack_normal_ptr, cpu) ||
+ scs_alloc_percpu(
+ &sdei_shadow_call_stack_critical_ptr, cpu)) {
+ scs_free_sdei();
+ return -ENOMEM;
+ }
+ } else {
+ per_cpu(sdei_shadow_call_stack_normal_ptr, cpu) =
+ per_cpu(sdei_shadow_call_stack_normal, cpu);
+ per_cpu(sdei_shadow_call_stack_critical_ptr, cpu) =
+ per_cpu(sdei_shadow_call_stack_critical, cpu);
+ }
}
+
+ return 0;
}
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index d6259dac62b6..2854b9f7760a 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -13,6 +13,7 @@
#include <asm/kprobes.h>
#include <asm/mmu.h>
#include <asm/ptrace.h>
+#include <asm/scs.h>
#include <asm/sections.h>
#include <asm/stacktrace.h>
#include <asm/sysreg.h>
@@ -162,6 +163,12 @@ unsigned long sdei_arch_get_entry_point(int conduit)
return 0;
}

+ if (scs_init_sdei()) {
+ if (IS_ENABLED(CONFIG_VMAP_STACK))
+ free_sdei_stacks();
+ return 0;
+ }
+
sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;

#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
--
2.25.0.265.gbab2e86ba0-goog

2020-02-25 17:55:51

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v9 10/12] arm64: implement Shadow Call Stack

This change implements shadow stack switching, initial SCS set-up,
and interrupt shadow stacks for arm64.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/Kconfig | 5 ++++
arch/arm64/include/asm/scs.h | 37 +++++++++++++++++++++++++
arch/arm64/include/asm/thread_info.h | 3 +++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +++
arch/arm64/kernel/entry.S | 32 ++++++++++++++++++++--
arch/arm64/kernel/head.S | 9 +++++++
arch/arm64/kernel/irq.c | 2 ++
arch/arm64/kernel/process.c | 2 ++
arch/arm64/kernel/scs.c | 40 ++++++++++++++++++++++++++++
arch/arm64/kernel/smp.c | 4 +++
11 files changed, 136 insertions(+), 2 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 0b30e884e088..eae76686be77 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -65,6 +65,7 @@ config ARM64
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_SUPPORTS_MEMORY_FAILURE
+ select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
select ARCH_SUPPORTS_NUMA_BALANCING
@@ -1022,6 +1023,10 @@ config ARCH_HAS_CACHE_LINE_SIZE
config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y if PGTABLE_LEVELS > 2

+# Supported by clang >= 7.0
+config CC_HAVE_SHADOW_CALL_STACK
+ def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
+
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
---help---
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
new file mode 100644
index 000000000000..c50d2b0c6c5f
--- /dev/null
+++ b/arch/arm64/include/asm/scs.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/scs.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+extern void scs_init_irq(void);
+
+static __always_inline void scs_save(struct task_struct *tsk)
+{
+ void *s;
+
+ asm volatile("mov %0, x18" : "=r" (s));
+ task_set_scs(tsk, s);
+}
+
+static inline void scs_overflow_check(struct task_struct *tsk)
+{
+ if (unlikely(scs_corrupted(tsk)))
+ panic("corrupted shadow stack detected inside scheduler\n");
+}
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+static inline void scs_init_irq(void) {}
+static inline void scs_save(struct task_struct *tsk) {}
+static inline void scs_overflow_check(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* __ASSEMBLY __ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index f0cec4160136..8c73764b9ed2 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -41,6 +41,9 @@ struct thread_info {
#endif
} preempt;
};
+#ifdef CONFIG_SHADOW_CALL_STACK
+ void *shadow_call_stack;
+#endif
};

#define thread_saved_pc(tsk) \
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index fc6488660f64..08fafc4da2cf 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-y += vdso/ probes/
obj-$(CONFIG_COMPAT_VDSO) += vdso32/
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index a5bdce8af65b..d485dc5cd196 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -33,6 +33,9 @@ int main(void)
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
+#endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+ DEFINE(TSK_TI_SCS, offsetof(struct task_struct, thread_info.shadow_call_stack));
#endif
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
#ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 9461d812ae27..4b18c3bbdea5 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -177,6 +177,10 @@ alternative_cb_end

apply_ssbd 1, x22, x23

+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [tsk, #TSK_TI_SCS] // Restore shadow call stack
+ str xzr, [tsk, #TSK_TI_SCS] // Limit visibility of saved SCS
+#endif
.else
add x21, sp, #S_FRAME_SIZE
get_current_task tsk
@@ -278,6 +282,12 @@ alternative_else_nop_endif
ct_user_enter
.endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ .if \el == 0
+ str x18, [tsk, #TSK_TI_SCS] // Save shadow call stack
+ .endif
+#endif
+
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
@@ -383,6 +393,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

.macro irq_stack_entry
mov x19, sp // preserve the original sp
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x24, x18 // preserve the original shadow stack
+#endif

/*
* Compare sp with the base of the task stack.
@@ -400,15 +413,25 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

/* switch to the irq stack */
mov sp, x26
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* also switch to the irq shadow stack */
+ ldr_this_cpu x18, irq_shadow_call_stack_ptr, x26
+#endif
+
9998:
.endm

/*
- * x19 should be preserved between irq_stack_entry and
- * irq_stack_exit.
+ * The callee-saved regs (x19-x29) should be preserved between
+ * irq_stack_entry and irq_stack_exit, but note that kernel_entry
+ * uses x20-x23 to store data for later use.
*/
.macro irq_stack_exit
mov sp, x19
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x18, x24
+#endif
.endm

/* GPRs used by entry code */
@@ -895,6 +918,11 @@ ENTRY(cpu_switch_to)
ldr lr, [x8]
mov sp, x9
msr sp_el0, x1
+#ifdef CONFIG_SHADOW_CALL_STACK
+ str x18, [x0, #TSK_TI_SCS]
+ ldr x18, [x1, #TSK_TI_SCS]
+ str xzr, [x1, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
ret
ENDPROC(cpu_switch_to)
NOKPROBE(cpu_switch_to)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 989b1944cb71..ca561de903d4 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -27,6 +27,7 @@
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/scs.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
@@ -424,6 +425,10 @@ __primary_switched:
stp xzr, x30, [sp, #-16]!
mov x29, sp

+#ifdef CONFIG_SHADOW_CALL_STACK
+ adr_l x18, init_shadow_call_stack // Set shadow call stack
+#endif
+
str_l x21, __fdt_pointer, x5 // Save FDT pointer

ldr_l x4, kimage_vaddr // Save the offset between
@@ -731,6 +736,10 @@ __secondary_switched:
ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow
msr sp_el0, x2
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [x2, #TSK_TI_SCS] // set shadow call stack
+ str xzr, [x2, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
mov x29, #0
mov x30, #0
b secondary_start_kernel
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 04a327ccf84d..fe0ca522ff60 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -21,6 +21,7 @@
#include <linux/vmalloc.h>
#include <asm/daifflags.h>
#include <asm/vmap_stack.h>
+#include <asm/scs.h>

unsigned long irq_err_count;

@@ -63,6 +64,7 @@ static void init_irq_stacks(void)
void __init init_IRQ(void)
{
init_irq_stacks();
+ scs_init_irq();
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 00626057a384..9151616c354c 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -52,6 +52,7 @@
#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/pointer_auth.h>
+#include <asm/scs.h>
#include <asm/stacktrace.h>

#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
@@ -514,6 +515,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
uao_thread_switch(next);
ptrauth_thread_switch(next);
ssbs_thread_switch(next);
+ scs_overflow_check(next);

/*
* Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
new file mode 100644
index 000000000000..eaadf5430baa
--- /dev/null
+++ b/arch/arm64/kernel/scs.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/percpu.h>
+#include <linux/vmalloc.h>
+#include <asm/pgtable.h>
+#include <asm/scs.h>
+
+DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+
+#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
+DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
+ __aligned(SCS_SIZE);
+#endif
+
+void scs_init_irq(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+ unsigned long *p;
+
+ p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
+#else
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ per_cpu(irq_shadow_call_stack, cpu);
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+ }
+}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index d4ed9a19d8fe..f2cb344f998c 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -46,6 +46,7 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
+#include <asm/scs.h>
#include <asm/smp_plat.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -358,6 +359,9 @@ void cpu_die(void)
{
unsigned int cpu = smp_processor_id();

+ /* Save the shadow stack pointer before exiting the idle task */
+ scs_save(current);
+
idle_task_exit();

local_daif_mask();
--
2.25.0.265.gbab2e86ba0-goog

2020-02-25 17:56:45

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v9 12/12] efi/libstub: disable SCS

Shadow stacks are not available in the EFI stub, filter out SCS flags.

Suggested-by: James Morse <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
---
drivers/firmware/efi/libstub/Makefile | 3 +++
1 file changed, 3 insertions(+)

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 98a81576213d..ee5c37c401c9 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -30,6 +30,9 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
$(call cc-option,-fno-stack-protector) \
-D__DISABLE_EXPORTS

+# remove SCS flags from all objects in this directory
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+
GCOV_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.25.0.265.gbab2e86ba0-goog

2020-02-28 16:32:21

by James Morse

[permalink] [raw]
Subject: Re: [PATCH v9 10/12] arm64: implement Shadow Call Stack

Hi Sami,

On 25/02/2020 17:39, Sami Tolvanen wrote:
> This change implements shadow stack switching, initial SCS set-up,
> and interrupt shadow stacks for arm64.

> diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
> new file mode 100644
> index 000000000000..c50d2b0c6c5f
> --- /dev/null
> +++ b/arch/arm64/include/asm/scs.h
> @@ -0,0 +1,37 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _ASM_SCS_H
> +#define _ASM_SCS_H
> +
> +#ifndef __ASSEMBLY__

As the whole file is guarded by this, why do you need to include it in assembly files at all?


> +
> +#include <linux/scs.h>
> +
> +#ifdef CONFIG_SHADOW_CALL_STACK
> +
> +extern void scs_init_irq(void);
> +
> +static __always_inline void scs_save(struct task_struct *tsk)
> +{
> + void *s;
> +
> + asm volatile("mov %0, x18" : "=r" (s));
> + task_set_scs(tsk, s);
> +}
> +
> +static inline void scs_overflow_check(struct task_struct *tsk)
> +{
> + if (unlikely(scs_corrupted(tsk)))
> + panic("corrupted shadow stack detected inside scheduler\n");

Could this ever catch anything with CONFIG_SHADOW_CALL_STACK_VMAP?
Wouldn't we have hit the vmalloc guard page at the point of overflow?


> +}
> +
> +#else /* CONFIG_SHADOW_CALL_STACK */
> +
> +static inline void scs_init_irq(void) {}
> +static inline void scs_save(struct task_struct *tsk) {}
> +static inline void scs_overflow_check(struct task_struct *tsk) {}
> +
> +#endif /* CONFIG_SHADOW_CALL_STACK */
> +
> +#endif /* __ASSEMBLY __ */
> +
> +#endif /* _ASM_SCS_H */



> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index 9461d812ae27..4b18c3bbdea5 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S

If I corrupt x18 so that we take an exception (mov x18, xzr), we take that exception
whenever we run C code. The CPU 'vanishes' and I get a very upset scheduler shortly after.

Stack misalignment has the same problem, but the overflow test (eventually) catches that,
then calls panic() using the overflow stack. (See the kernel_ventry macro and __bad_stack
in entry.S)

It would be nice to have a per-cpu stack that we switch to when on the overflow stack.
(this would catch the scs overflow hitting the guard page too, as we should eat through
the regular stack until we overflowed it!)


> diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
> index d4ed9a19d8fe..f2cb344f998c 100644
> --- a/arch/arm64/kernel/smp.c
> +++ b/arch/arm64/kernel/smp.c
> @@ -358,6 +359,9 @@ void cpu_die(void)
> {
> unsigned int cpu = smp_processor_id();
>
> + /* Save the shadow stack pointer before exiting the idle task */

I can't work out why this needs to be before before idle_task_exit()...
It needs to run before init_idle(), which calls scs_task_reset(), but all that is on the
cpu_up() path. (if it is to pair those up, any reason core code can't do both?)


> + scs_save(current);
> +
> idle_task_exit();
>
> local_daif_mask();
>


Reviewed-by: James Morse <[email protected]>


Thanks!

James

2020-02-28 20:52:52

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v9 10/12] arm64: implement Shadow Call Stack

On Fri, Feb 28, 2020 at 8:31 AM James Morse <[email protected]> wrote:
> > +#ifndef __ASSEMBLY__
>
> As the whole file is guarded by this, why do you need to include it in assembly files at all?

True, the include in head.S is not needed. I'll remove it in the next version.

> > +static inline void scs_overflow_check(struct task_struct *tsk)
> > +{
> > + if (unlikely(scs_corrupted(tsk)))
> > + panic("corrupted shadow stack detected inside scheduler\n");
>
> Could this ever catch anything with CONFIG_SHADOW_CALL_STACK_VMAP?
> Wouldn't we have hit the vmalloc guard page at the point of overflow?

With CONFIG_SHADOW_CALL_STACK_VMAP, even though we allocate a full
page, SCS_SIZE is still 1k, so we should catch overflows here well
before we hit the guard page.

> It would be nice to have a per-cpu stack that we switch to when on the overflow stack.

It shouldn't be a problem to add an overflow shadow stack if you think
one is needed.

> I can't work out why this needs to be before before idle_task_exit()...
> It needs to run before init_idle(), which calls scs_task_reset(), but all that is on the
> cpu_up() path. (if it is to pair those up, any reason core code can't do both?)

At this point, the idle task's shadow stack pointer is only stored in
x18, so we need to save it again to thread_info before the CPU shuts
down, or we'll lose the pointer.

Sami

2020-04-06 16:42:29

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v10 00/12] add support for Clang's Shadow Call Stack

This patch series adds support for Clang's Shadow Call Stack
(SCS) mitigation, which uses a separately allocated shadow stack
to protect against return address overwrites. More information
can be found here:

https://clang.llvm.org/docs/ShadowCallStack.html

SCS provides better protection against traditional buffer
overflows than CONFIG_STACKPROTECTOR_*, but it should be noted
that SCS security guarantees in the kernel differ from the ones
documented for user space. The kernel must store addresses of
shadow stacks used by inactive tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

SCS is currently supported only on arm64, where the compiler
requires the x18 register to be reserved for holding the current
task's shadow stack pointer.

With -fsanitize=shadow-call-stack, the compiler injects
instructions to all non-leaf C functions to store the return
address to the shadow stack, and unconditionally load it again
before returning. As a result, SCS is currently incompatible
with features that rely on modifying function return addresses
in the kernel stack to alter control flow. A copy of the return
address is still kept in the kernel stack for compatibility with
stack unwinding, for example.

SCS has a minimal performance overhead, but allocating
shadow stacks increases kernel memory usage. The feature is
therefore mostly useful on hardware that lacks support for PAC
instructions.

Changes in v10:
- Removed an unnecessary <asm/scs.h> include from head.S.

Changes in v9:
- Fixed grammar in the Kconfig help text.
- Changed Kconfig to allow SCS to be selected with the patchable-
function-entry graph tracer.
- Changed the EFI stub patch to not filter out -ffixed-x18, only
SCS flags.

Changes in v8:
- Added __noscs to __hyp_text instead of filtering SCS flags from
the entire arch/arm64/kvm/hyp directory.
- Added a patch to filter out -ffixed-x18 and SCS flags from the
EFI stub.

Changes in v7:
- Changed irq_stack_entry/exit to store the shadow stack pointer
in x24 instead of x20 as kernel_entry uses x20-x23 to store
data that can be used later. Updated the comment as well.
- Changed the Makefile in arch/arm64/kvm/hyp to also filter out
-ffixed-x18.
- Changed SHADOW_CALL_STACK to depend on !FUNCTION_GRAPH_TRACER
instead of not selecting HAVE_FUNCTION_GRAPH_TRACER with SCS.
- Removed ifdefs from the EFI wrapper and updated the comment to
explain why we are restoring x18.
- Rebased as Ard's x18 patches that were part of this series have
already been merged.

Changes in v6:
- Updated comment in the EFI RT wrapper to include the
explanation from the commit message.
- Fixed the SHADOW_CALL_STACK_VMAP config option and the
compilation errors in scs_init_irq()
- Updated the comment in entry.S to Mark's suggestion
- Fixed the WARN_ON in scs_init() to trip only when the return
value for cpuhp_setup_state() is < 0.
- Removed ifdefs from the code in arch/arm64/kernel/scs.c and
added separate shadow stacks for the SDEI handler

Changes in v5:
- Updated the comment in __scs_base() to Mark's suggestion
- Changed all instances of uintptr_t to unsigned long
- Added allocation poisoning for KASAN to catch unintentional
shadow stack accesses; moved set_set_magic before poisoning
and switched scs_used() and scs_corrupted() to access the
buffer using READ_ONCE_NOCHECK() instead
- Changed scs_free() to check for NULL instead of zero
- Renamed SCS_CACHE_SIZE to NR_CACHED_SCS
- Added a warning if cpuhp_setup_state fails in scs_init()
- Dropped patches disabling kretprobes after confirming there's
no functional conflict with SCS instrumentation
- Added an explanation to the commit message why function graph
tracing and SCS are incompatible
- Removed the ifdefs from arch/arm64/mm/proc.S and added
comments explaining why we are saving and restoring x18
- Updated scs_check_usage format to include process information

Changes in v4:
- Fixed authorship for Ard's patches
- Added missing commit messages
- Commented code that clears SCS from thread_info
- Added a comment about SCS_END_MAGIC being non-canonical

Changes in v3:
- Switched to filter-out for removing SCS flags in Makefiles
- Changed the __noscs attribute to use __no_sanitize__("...")
instead of no_sanitize("...")
- Cleaned up inline function definitions and moved task_scs()
into a macro
- Cleaned up scs_free() and scs_magic()
- Moved SCS initialization into dup_task_struct() and removed
the now unused scs_task_init()
- Added comments to __scs_base() and scs_task_reset() to better
document design choices
- Changed copy_page to make the offset and bias explicit

Changes in v2:
- Changed Ard's KVM patch to use x29 instead of x18 for the
guest context, which makes restore_callee_saved_regs cleaner
- Updated help text (and commit messages) to point out
differences in security properties compared to user space SCS
- Cleaned up config options: removed the ROP protection choice,
replaced the CC_IS_CLANG dependency with an arch-specific
cc-option test, and moved disabling of incompatible config
options to an arch-specific Kconfig
- Added CC_FLAGS_SCS, which are filtered out where needed
instead of using DISABLE_SCS
- Added a __has_feature guard around __noscs for older clang
versions

Sami Tolvanen (12):
add support for Clang's Shadow Call Stack (SCS)
scs: add accounting
scs: add support for stack usage debugging
scs: disable when function graph tracing is enabled
arm64: reserve x18 from general allocation with SCS
arm64: preserve x18 when CPU is suspended
arm64: efi: restore x18 if it was corrupted
arm64: vdso: disable Shadow Call Stack
arm64: disable SCS for hypervisor code
arm64: implement Shadow Call Stack
arm64: scs: add shadow stacks for SDEI
efi/libstub: disable SCS

Makefile | 6 +
arch/Kconfig | 35 ++++
arch/arm64/Kconfig | 5 +
arch/arm64/Makefile | 4 +
arch/arm64/include/asm/kvm_hyp.h | 2 +-
arch/arm64/include/asm/scs.h | 39 ++++
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/include/asm/thread_info.h | 3 +
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +
arch/arm64/kernel/efi-rt-wrapper.S | 11 +-
arch/arm64/kernel/entry.S | 47 ++++-
arch/arm64/kernel/head.S | 8 +
arch/arm64/kernel/irq.c | 2 +
arch/arm64/kernel/process.c | 2 +
arch/arm64/kernel/scs.c | 114 ++++++++++++
arch/arm64/kernel/sdei.c | 7 +
arch/arm64/kernel/smp.c | 4 +
arch/arm64/kernel/vdso/Makefile | 2 +-
arch/arm64/mm/proc.S | 14 ++
drivers/base/node.c | 6 +
drivers/firmware/efi/libstub/Makefile | 3 +
fs/proc/meminfo.c | 4 +
include/linux/compiler-clang.h | 6 +
include/linux/compiler_types.h | 4 +
include/linux/mmzone.h | 3 +
include/linux/scs.h | 57 ++++++
init/init_task.c | 8 +
kernel/Makefile | 1 +
kernel/fork.c | 9 +
kernel/sched/core.c | 2 +
kernel/scs.c | 246 ++++++++++++++++++++++++++
mm/page_alloc.c | 6 +
mm/vmstat.c | 3 +
34 files changed, 662 insertions(+), 7 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c


base-commit: a10c9c710f9ecea87b9f4bbb837467893b4bef01
--
2.26.0.292.g33ef6b2f38-goog

2020-04-06 16:42:37

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v10 05/12] arm64: reserve x18 from general allocation with SCS

Reserve the x18 register from general allocation when SCS is enabled,
because the compiler uses the register to store the current task's
shadow stack pointer. Note that all external kernel modules must also be
compiled with -ffixed-x18 if the kernel has SCS enabled.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/Makefile | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index f15f92ba53e6..34277c60cdf9 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -76,6 +76,10 @@ branch-prot-flags-$(CONFIG_AS_HAS_PAC) += -Wa,-march=armv8.3-a
KBUILD_CFLAGS += $(branch-prot-flags-y)
endif

+ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
+KBUILD_CFLAGS += -ffixed-x18
+endif
+
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
CHECKFLAGS += -D__AARCH64EB__
--
2.26.0.292.g33ef6b2f38-goog

2020-04-06 16:42:48

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v10 07/12] arm64: efi: restore x18 if it was corrupted

If we detect a corrupted x18, restore the register before jumping back
to potentially SCS instrumented code. This is safe, because the wrapper
is called with preemption disabled and a separate shadow stack is used
for interrupt handling.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/kernel/efi-rt-wrapper.S | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 3fc71106cb2b..6ca6c0dc11a1 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -34,5 +34,14 @@ ENTRY(__efi_rt_asm_wrapper)
ldp x29, x30, [sp], #32
b.ne 0f
ret
-0: b efi_handle_corrupted_x18 // tail call
+0:
+ /*
+ * With CONFIG_SHADOW_CALL_STACK, the kernel uses x18 to store a
+ * shadow stack pointer, which we need to restore before returning to
+ * potentially instrumented code. This is safe because the wrapper is
+ * called with preemption disabled and a separate shadow stack is used
+ * for interrupts.
+ */
+ mov x18, x2
+ b efi_handle_corrupted_x18 // tail call
ENDPROC(__efi_rt_asm_wrapper)
--
2.26.0.292.g33ef6b2f38-goog

2020-04-06 16:43:13

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v10 10/12] arm64: implement Shadow Call Stack

This change implements shadow stack switching, initial SCS set-up,
and interrupt shadow stacks for arm64.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/Kconfig | 5 ++++
arch/arm64/include/asm/scs.h | 37 +++++++++++++++++++++++++
arch/arm64/include/asm/thread_info.h | 3 +++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +++
arch/arm64/kernel/entry.S | 33 +++++++++++++++++++++--
arch/arm64/kernel/head.S | 8 ++++++
arch/arm64/kernel/irq.c | 2 ++
arch/arm64/kernel/process.c | 2 ++
arch/arm64/kernel/scs.c | 40 ++++++++++++++++++++++++++++
arch/arm64/kernel/smp.c | 4 +++
11 files changed, 136 insertions(+), 2 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 6e41c4b62607..b47c254ce1dd 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -64,6 +64,7 @@ config ARM64
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_SUPPORTS_MEMORY_FAILURE
+ select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
select ARCH_SUPPORTS_NUMA_BALANCING
@@ -1025,6 +1026,10 @@ config ARCH_HAS_CACHE_LINE_SIZE
config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y if PGTABLE_LEVELS > 2

+# Supported by clang >= 7.0
+config CC_HAVE_SHADOW_CALL_STACK
+ def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
+
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
---help---
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
new file mode 100644
index 000000000000..c50d2b0c6c5f
--- /dev/null
+++ b/arch/arm64/include/asm/scs.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/scs.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+extern void scs_init_irq(void);
+
+static __always_inline void scs_save(struct task_struct *tsk)
+{
+ void *s;
+
+ asm volatile("mov %0, x18" : "=r" (s));
+ task_set_scs(tsk, s);
+}
+
+static inline void scs_overflow_check(struct task_struct *tsk)
+{
+ if (unlikely(scs_corrupted(tsk)))
+ panic("corrupted shadow stack detected inside scheduler\n");
+}
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+static inline void scs_init_irq(void) {}
+static inline void scs_save(struct task_struct *tsk) {}
+static inline void scs_overflow_check(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* __ASSEMBLY __ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 512174a8e789..1fb651f73da3 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -41,6 +41,9 @@ struct thread_info {
#endif
} preempt;
};
+#ifdef CONFIG_SHADOW_CALL_STACK
+ void *shadow_call_stack;
+#endif
};

#define thread_saved_pc(tsk) \
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 4e5b8ee31442..151f28521f1e 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-y += vdso/ probes/
obj-$(CONFIG_COMPAT_VDSO) += vdso32/
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 9981a0a5a87f..777a662888ec 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -33,6 +33,9 @@ int main(void)
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
+#endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+ DEFINE(TSK_TI_SCS, offsetof(struct task_struct, thread_info.shadow_call_stack));
#endif
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
#ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ddcde093c433..c33264ce7258 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -179,6 +179,11 @@ alternative_cb_end
apply_ssbd 1, x22, x23

ptrauth_keys_install_kernel tsk, 1, x20, x22, x23
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [tsk, #TSK_TI_SCS] // Restore shadow call stack
+ str xzr, [tsk, #TSK_TI_SCS] // Limit visibility of saved SCS
+#endif
.else
add x21, sp, #S_FRAME_SIZE
get_current_task tsk
@@ -280,6 +285,12 @@ alternative_else_nop_endif
ct_user_enter
.endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ .if \el == 0
+ str x18, [tsk, #TSK_TI_SCS] // Save shadow call stack
+ .endif
+#endif
+
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
@@ -388,6 +399,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

.macro irq_stack_entry
mov x19, sp // preserve the original sp
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x24, x18 // preserve the original shadow stack
+#endif

/*
* Compare sp with the base of the task stack.
@@ -405,15 +419,25 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

/* switch to the irq stack */
mov sp, x26
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* also switch to the irq shadow stack */
+ ldr_this_cpu x18, irq_shadow_call_stack_ptr, x26
+#endif
+
9998:
.endm

/*
- * x19 should be preserved between irq_stack_entry and
- * irq_stack_exit.
+ * The callee-saved regs (x19-x29) should be preserved between
+ * irq_stack_entry and irq_stack_exit, but note that kernel_entry
+ * uses x20-x23 to store data for later use.
*/
.macro irq_stack_exit
mov sp, x19
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x18, x24
+#endif
.endm

/* GPRs used by entry code */
@@ -901,6 +925,11 @@ SYM_FUNC_START(cpu_switch_to)
mov sp, x9
msr sp_el0, x1
ptrauth_keys_install_kernel x1, 1, x8, x9, x10
+#ifdef CONFIG_SHADOW_CALL_STACK
+ str x18, [x0, #TSK_TI_SCS]
+ ldr x18, [x1, #TSK_TI_SCS]
+ str xzr, [x1, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
ret
SYM_FUNC_END(cpu_switch_to)
NOKPROBE(cpu_switch_to)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 57a91032b4c2..1514445bbccb 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -424,6 +424,10 @@ SYM_FUNC_START_LOCAL(__primary_switched)
stp xzr, x30, [sp, #-16]!
mov x29, sp

+#ifdef CONFIG_SHADOW_CALL_STACK
+ adr_l x18, init_shadow_call_stack // Set shadow call stack
+#endif
+
str_l x21, __fdt_pointer, x5 // Save FDT pointer

ldr_l x4, kimage_vaddr // Save the offset between
@@ -737,6 +741,10 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow
msr sp_el0, x2
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [x2, #TSK_TI_SCS] // set shadow call stack
+ str xzr, [x2, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
mov x29, #0
mov x30, #0
b secondary_start_kernel
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 04a327ccf84d..fe0ca522ff60 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -21,6 +21,7 @@
#include <linux/vmalloc.h>
#include <asm/daifflags.h>
#include <asm/vmap_stack.h>
+#include <asm/scs.h>

unsigned long irq_err_count;

@@ -63,6 +64,7 @@ static void init_irq_stacks(void)
void __init init_IRQ(void)
{
init_irq_stacks();
+ scs_init_irq();
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 56be4cbf771f..a35d3318492c 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -52,6 +52,7 @@
#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/pointer_auth.h>
+#include <asm/scs.h>
#include <asm/stacktrace.h>

#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
@@ -515,6 +516,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
entry_task_switch(next);
uao_thread_switch(next);
ssbs_thread_switch(next);
+ scs_overflow_check(next);

/*
* Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
new file mode 100644
index 000000000000..eaadf5430baa
--- /dev/null
+++ b/arch/arm64/kernel/scs.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/percpu.h>
+#include <linux/vmalloc.h>
+#include <asm/pgtable.h>
+#include <asm/scs.h>
+
+DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+
+#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
+DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
+ __aligned(SCS_SIZE);
+#endif
+
+void scs_init_irq(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+ unsigned long *p;
+
+ p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
+#else
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ per_cpu(irq_shadow_call_stack, cpu);
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+ }
+}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 061f60fe452f..1d112e34a636 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -46,6 +46,7 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
+#include <asm/scs.h>
#include <asm/smp_plat.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -370,6 +371,9 @@ void cpu_die(void)
unsigned int cpu = smp_processor_id();
const struct cpu_operations *ops = get_cpu_ops(cpu);

+ /* Save the shadow stack pointer before exiting the idle task */
+ scs_save(current);
+
idle_task_exit();

local_daif_mask();
--
2.26.0.292.g33ef6b2f38-goog

2020-04-06 16:43:16

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v10 11/12] arm64: scs: add shadow stacks for SDEI

This change adds per-CPU shadow call stacks for the SDEI handler.
Similarly to how the kernel stacks are handled, we add separate shadow
stacks for normal and critical events.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: James Morse <[email protected]>
Tested-by: James Morse <[email protected]>
---
arch/arm64/include/asm/scs.h | 2 +
arch/arm64/kernel/entry.S | 14 ++++-
arch/arm64/kernel/scs.c | 106 +++++++++++++++++++++++++++++------
arch/arm64/kernel/sdei.c | 7 +++
4 files changed, 112 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
index c50d2b0c6c5f..8e327e14bc15 100644
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -9,6 +9,7 @@
#ifdef CONFIG_SHADOW_CALL_STACK

extern void scs_init_irq(void);
+extern int scs_init_sdei(void);

static __always_inline void scs_save(struct task_struct *tsk)
{
@@ -27,6 +28,7 @@ static inline void scs_overflow_check(struct task_struct *tsk)
#else /* CONFIG_SHADOW_CALL_STACK */

static inline void scs_init_irq(void) {}
+static inline int scs_init_sdei(void) { return 0; }
static inline void scs_save(struct task_struct *tsk) {}
static inline void scs_overflow_check(struct task_struct *tsk) {}

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index c33264ce7258..768cd7abd32c 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1058,13 +1058,16 @@ SYM_CODE_START(__sdei_asm_handler)

mov x19, x1

+#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
+ ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
+#endif
+
#ifdef CONFIG_VMAP_STACK
/*
* entry.S may have been using sp as a scratch register, find whether
* this is a normal or critical event and switch to the appropriate
* stack for this CPU.
*/
- ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
cbnz w4, 1f
ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
b 2f
@@ -1074,6 +1077,15 @@ SYM_CODE_START(__sdei_asm_handler)
mov sp, x5
#endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* Use a separate shadow call stack for normal and critical events */
+ cbnz w4, 3f
+ ldr_this_cpu dst=x18, sym=sdei_shadow_call_stack_normal_ptr, tmp=x6
+ b 4f
+3: ldr_this_cpu dst=x18, sym=sdei_shadow_call_stack_critical_ptr, tmp=x6
+4:
+#endif
+
/*
* We may have interrupted userspace, or a guest, or exit-from or
* return-to either of these. We can't trust sp_el0, restore it.
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
index eaadf5430baa..dddb7c56518b 100644
--- a/arch/arm64/kernel/scs.c
+++ b/arch/arm64/kernel/scs.c
@@ -10,31 +10,105 @@
#include <asm/pgtable.h>
#include <asm/scs.h>

-DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+#define DECLARE_SCS(name) \
+ DECLARE_PER_CPU(unsigned long *, name ## _ptr); \
+ DECLARE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name)

-#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
-DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
- __aligned(SCS_SIZE);
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+#define DEFINE_SCS(name) \
+ DEFINE_PER_CPU(unsigned long *, name ## _ptr)
+#else
+/* Allocate a static per-CPU shadow stack */
+#define DEFINE_SCS(name) \
+ DEFINE_PER_CPU(unsigned long *, name ## _ptr); \
+ DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \
+ __aligned(SCS_SIZE)
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+DECLARE_SCS(irq_shadow_call_stack);
+DECLARE_SCS(sdei_shadow_call_stack_normal);
+DECLARE_SCS(sdei_shadow_call_stack_critical);
+
+DEFINE_SCS(irq_shadow_call_stack);
+#ifdef CONFIG_ARM_SDE_INTERFACE
+DEFINE_SCS(sdei_shadow_call_stack_normal);
+DEFINE_SCS(sdei_shadow_call_stack_critical);
#endif

+static int scs_alloc_percpu(unsigned long * __percpu *ptr, int cpu)
+{
+ unsigned long *p;
+
+ p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ if (!p)
+ return -ENOMEM;
+ per_cpu(*ptr, cpu) = p;
+
+ return 0;
+}
+
+static void scs_free_percpu(unsigned long * __percpu *ptr, int cpu)
+{
+ unsigned long *p = per_cpu(*ptr, cpu);
+
+ if (p) {
+ per_cpu(*ptr, cpu) = NULL;
+ vfree(p);
+ }
+}
+
+static void scs_free_sdei(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ scs_free_percpu(&sdei_shadow_call_stack_normal_ptr, cpu);
+ scs_free_percpu(&sdei_shadow_call_stack_critical_ptr, cpu);
+ }
+}
+
void scs_init_irq(void)
{
int cpu;

for_each_possible_cpu(cpu) {
-#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
- unsigned long *p;
+ if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK_VMAP))
+ WARN_ON(scs_alloc_percpu(&irq_shadow_call_stack_ptr,
+ cpu));
+ else
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ per_cpu(irq_shadow_call_stack, cpu);
+ }
+}

- p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
- VMALLOC_START, VMALLOC_END,
- GFP_SCS, PAGE_KERNEL,
- 0, cpu_to_node(cpu),
- __builtin_return_address(0));
+int scs_init_sdei(void)
+{
+ int cpu;

- per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
-#else
- per_cpu(irq_shadow_call_stack_ptr, cpu) =
- per_cpu(irq_shadow_call_stack, cpu);
-#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+ if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
+ return 0;
+
+ for_each_possible_cpu(cpu) {
+ if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK_VMAP)) {
+ if (scs_alloc_percpu(
+ &sdei_shadow_call_stack_normal_ptr, cpu) ||
+ scs_alloc_percpu(
+ &sdei_shadow_call_stack_critical_ptr, cpu)) {
+ scs_free_sdei();
+ return -ENOMEM;
+ }
+ } else {
+ per_cpu(sdei_shadow_call_stack_normal_ptr, cpu) =
+ per_cpu(sdei_shadow_call_stack_normal, cpu);
+ per_cpu(sdei_shadow_call_stack_critical_ptr, cpu) =
+ per_cpu(sdei_shadow_call_stack_critical, cpu);
+ }
}
+
+ return 0;
}
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index d6259dac62b6..2854b9f7760a 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -13,6 +13,7 @@
#include <asm/kprobes.h>
#include <asm/mmu.h>
#include <asm/ptrace.h>
+#include <asm/scs.h>
#include <asm/sections.h>
#include <asm/stacktrace.h>
#include <asm/sysreg.h>
@@ -162,6 +163,12 @@ unsigned long sdei_arch_get_entry_point(int conduit)
return 0;
}

+ if (scs_init_sdei()) {
+ if (IS_ENABLED(CONFIG_VMAP_STACK))
+ free_sdei_stacks();
+ return 0;
+ }
+
sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;

#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
--
2.26.0.292.g33ef6b2f38-goog

2020-04-06 16:43:19

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v10 12/12] efi/libstub: disable SCS

Shadow stacks are not available in the EFI stub, filter out SCS flags.

Suggested-by: James Morse <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
---
drivers/firmware/efi/libstub/Makefile | 3 +++
1 file changed, 3 insertions(+)

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 094eabdecfe6..fa0bb64f93d6 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -32,6 +32,9 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
$(call cc-option,-fno-stack-protector) \
-D__DISABLE_EXPORTS

+# remove SCS flags from all objects in this directory
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+
GCOV_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.26.0.292.g33ef6b2f38-goog

2020-04-06 16:43:23

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v10 02/12] scs: add accounting

This change adds accounting for the memory allocated for shadow stacks.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
drivers/base/node.c | 6 ++++++
fs/proc/meminfo.c | 4 ++++
include/linux/mmzone.h | 3 +++
kernel/scs.c | 20 ++++++++++++++++++++
mm/page_alloc.c | 6 ++++++
mm/vmstat.c | 3 +++
6 files changed, 42 insertions(+)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 10d7e818e118..502ab5447c8d 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d AnonPages: %8lu kB\n"
"Node %d Shmem: %8lu kB\n"
"Node %d KernelStack: %8lu kB\n"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ "Node %d ShadowCallStack:%8lu kB\n"
+#endif
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
@@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8c1f1bb1a5ce..49768005a79e 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB));
+#ifdef CONFIG_SHADOW_CALL_STACK
+ seq_printf(m, "ShadowCallStack:%8lu kB\n",
+ global_zone_page_state(NR_KERNEL_SCS_BYTES) / 1024);
+#endif
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index e84d448988b6..a6c60e6efa68 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -200,6 +200,9 @@ enum zone_stat_item {
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ NR_KERNEL_SCS_BYTES, /* measured in bytes */
+#endif
/* Second 128 byte cacheline */
NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
diff --git a/kernel/scs.c b/kernel/scs.c
index 28abed21950c..5245e992c692 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -12,6 +12,7 @@
#include <linux/scs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/vmstat.h>
#include <asm/scs.h>

static inline void *__scs_base(struct task_struct *tsk)
@@ -89,6 +90,11 @@ static void scs_free(void *s)
vfree_atomic(s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return vmalloc_to_page(__scs_base(tsk));
+}
+
static int scs_cleanup(unsigned int cpu)
{
int i;
@@ -135,6 +141,11 @@ static inline void scs_free(void *s)
kmem_cache_free(scs_cache, s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return virt_to_page(__scs_base(tsk));
+}
+
void __init scs_init(void)
{
scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
@@ -153,6 +164,12 @@ void scs_task_reset(struct task_struct *tsk)
task_set_scs(tsk, __scs_base(tsk));
}

+static void scs_account(struct task_struct *tsk, int account)
+{
+ mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_BYTES,
+ account * SCS_SIZE);
+}
+
int scs_prepare(struct task_struct *tsk, int node)
{
void *s;
@@ -162,6 +179,8 @@ int scs_prepare(struct task_struct *tsk, int node)
return -ENOMEM;

task_set_scs(tsk, s);
+ scs_account(tsk, 1);
+
return 0;
}

@@ -182,6 +201,7 @@ void scs_release(struct task_struct *tsk)

WARN_ON(scs_corrupted(tsk));

+ scs_account(tsk, -1);
task_set_scs(tsk, NULL);
scs_free(s);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e5f76da8cd4e..79f07ccac63e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5338,6 +5338,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" managed:%lukB"
" mlocked:%lukB"
" kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ " shadow_call_stack:%lukB"
+#endif
" pagetables:%lukB"
" bounce:%lukB"
" free_pcp:%lukB"
@@ -5360,6 +5363,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)),
zone_page_state(zone, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ zone_page_state(zone, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
K(zone_page_state(zone, NR_PAGETABLE)),
K(zone_page_state(zone, NR_BOUNCE)),
K(free_pcp),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index c9c0d71f917f..287a95987b7b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1119,6 +1119,9 @@ const char * const vmstat_text[] = {
"nr_mlock",
"nr_page_table_pages",
"nr_kernel_stack",
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ "nr_shadow_call_stack_bytes",
+#endif
"nr_bounce",
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
--
2.26.0.292.g33ef6b2f38-goog

2020-04-06 16:43:54

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v10 08/12] arm64: vdso: disable Shadow Call Stack

Shadow stacks are only available in the kernel, so disable SCS
instrumentation for the vDSO.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/kernel/vdso/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index dd2514bb1511..a87a4f11724e 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -25,7 +25,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING

VDSO_LDFLAGS := -Bsymbolic

-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS)
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.26.0.292.g33ef6b2f38-goog

2020-04-06 16:43:58

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v10 09/12] arm64: disable SCS for hypervisor code

Disable SCS for code that runs at a different exception level by
adding __noscs to __hyp_text.

Suggested-by: James Morse <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
Acked-by: Marc Zyngier <[email protected]>
---
arch/arm64/include/asm/kvm_hyp.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index fe57f60f06a8..875b106c5d98 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -13,7 +13,7 @@
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>

-#define __hyp_text __section(.hyp.text) notrace
+#define __hyp_text __section(.hyp.text) notrace __noscs

#define read_sysreg_elx(r,nvh,vh) \
({ \
--
2.26.0.292.g33ef6b2f38-goog

2020-04-06 16:43:59

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v10 06/12] arm64: preserve x18 when CPU is suspended

Don't lose the current task's shadow stack when the CPU is suspended.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/mm/proc.S | 14 ++++++++++++++
2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 8939c87c4dce..0cde2f473971 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H

-#define NR_CTX_REGS 12
+#define NR_CTX_REGS 13
#define NR_CALLEE_SAVED_REGS 12

/*
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 197a9ba2d5ea..ed15be0f8103 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -58,6 +58,8 @@
* cpu_do_suspend - save CPU registers context
*
* x0: virtual address of context pointer
+ *
+ * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
*/
SYM_FUNC_START(cpu_do_suspend)
mrs x2, tpidr_el0
@@ -82,6 +84,11 @@ alternative_endif
stp x8, x9, [x0, #48]
stp x10, x11, [x0, #64]
stp x12, x13, [x0, #80]
+ /*
+ * Save x18 as it may be used as a platform register, e.g. by shadow
+ * call stack.
+ */
+ str x18, [x0, #96]
ret
SYM_FUNC_END(cpu_do_suspend)

@@ -98,6 +105,13 @@ SYM_FUNC_START(cpu_do_resume)
ldp x9, x10, [x0, #48]
ldp x11, x12, [x0, #64]
ldp x13, x14, [x0, #80]
+ /*
+ * Restore x18, as it may be used as a platform register, and clear
+ * the buffer to minimize the risk of exposure when used for shadow
+ * call stack.
+ */
+ ldr x18, [x0, #96]
+ str xzr, [x0, #96]
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
--
2.26.0.292.g33ef6b2f38-goog

2020-04-06 16:44:01

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v10 04/12] scs: disable when function graph tracing is enabled

The graph tracer hooks returns by modifying frame records on the
(regular) stack, but with SCS the return address is taken from the
shadow stack, and the value in the frame record has no effect. As we
don't currently have a mechanism to determine the corresponding slot
on the shadow stack (and to pass this through the ftrace
infrastructure), for now let's disable SCS when the graph tracer is
enabled.

With SCS the return address is taken from the shadow stack and the
value in the frame record has no effect. The mcount based graph tracer
hooks returns by modifying frame records on the (regular) stack, and
thus is not compatible. The patchable-function-entry graph tracer
used for DYNAMIC_FTRACE_WITH_REGS modifies the LR before it is saved
to the shadow stack, and is compatible.

Modifying the mcount based graph tracer to work with SCS would require
a mechanism to determine the corresponding slot on the shadow stack
(and to pass this through the ftrace infrastructure), and we expect
that everyone will eventually move to the patchable-function-entry
based graph tracer anyway, so for now let's disable SCS when the
mcount-based graph tracer is enabled.

SCS and patchable-function-entry are both supported from LLVM 10.x.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/Kconfig | 1 +
1 file changed, 1 insertion(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index 691a552c2cc3..c53cb9025ad2 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -542,6 +542,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK

config SHADOW_CALL_STACK
bool "Clang Shadow Call Stack"
+ depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
help
This option enables Clang's Shadow Call Stack, which uses a
--
2.26.0.292.g33ef6b2f38-goog

2020-04-06 17:06:23

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v10 01/12] add support for Clang's Shadow Call Stack (SCS)

This change adds generic support for Clang's Shadow Call Stack,
which uses a shadow stack to protect return addresses from being
overwritten by an attacker. Details are available here:

https://clang.llvm.org/docs/ShadowCallStack.html

Note that security guarantees in the kernel differ from the
ones documented for user space. The kernel must store addresses
of shadow stacks used by other tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Miguel Ojeda <[email protected]>
---
Makefile | 6 ++
arch/Kconfig | 34 ++++++
include/linux/compiler-clang.h | 6 ++
include/linux/compiler_types.h | 4 +
include/linux/scs.h | 57 ++++++++++
init/init_task.c | 8 ++
kernel/Makefile | 1 +
kernel/fork.c | 9 ++
kernel/sched/core.c | 2 +
kernel/scs.c | 187 +++++++++++++++++++++++++++++++++
10 files changed, 314 insertions(+)
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c

diff --git a/Makefile b/Makefile
index c91342953d9e..cb2ed7443d57 100644
--- a/Makefile
+++ b/Makefile
@@ -851,6 +851,12 @@ ifdef CONFIG_LIVEPATCH
KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
endif

+ifdef CONFIG_SHADOW_CALL_STACK
+CC_FLAGS_SCS := -fsanitize=shadow-call-stack
+KBUILD_CFLAGS += $(CC_FLAGS_SCS)
+export CC_FLAGS_SCS
+endif
+
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)

diff --git a/arch/Kconfig b/arch/Kconfig
index 786a85d4ad40..691a552c2cc3 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -533,6 +533,40 @@ config STACKPROTECTOR_STRONG
about 20% of all kernel functions, which increases the kernel code
size by about 2%.

+config ARCH_SUPPORTS_SHADOW_CALL_STACK
+ bool
+ help
+ An architecture should select this if it supports Clang's Shadow
+ Call Stack, has asm/scs.h, and implements runtime support for shadow
+ stack switching.
+
+config SHADOW_CALL_STACK
+ bool "Clang Shadow Call Stack"
+ depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
+ help
+ This option enables Clang's Shadow Call Stack, which uses a
+ shadow stack to protect function return addresses from being
+ overwritten by an attacker. More information can be found in
+ Clang's documentation:
+
+ https://clang.llvm.org/docs/ShadowCallStack.html
+
+ Note that security guarantees in the kernel differ from the ones
+ documented for user space. The kernel must store addresses of shadow
+ stacks used by other tasks and interrupt handlers in memory, which
+ means an attacker capable of reading and writing arbitrary memory
+ may be able to locate them and hijack control flow by modifying
+ shadow stacks that are not currently in use.
+
+config SHADOW_CALL_STACK_VMAP
+ bool "Use virtually mapped shadow call stacks"
+ depends on SHADOW_CALL_STACK
+ help
+ Use virtually mapped shadow call stacks. Selecting this option
+ provides better stack exhaustion protection, but increases per-thread
+ memory consumption as a full page is allocated for each shadow stack.
+
+
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 333a6695a918..18fc4d29ef27 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -42,3 +42,9 @@
* compilers, like ICC.
*/
#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#if __has_feature(shadow_call_stack)
+# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
+#else
+# define __noscs
+#endif
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index 72393a8c1a6c..be5d5be4b1ae 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -202,6 +202,10 @@ struct ftrace_likely_data {
# define randomized_struct_fields_end
#endif

+#ifndef __noscs
+# define __noscs
+#endif
+
#ifndef asm_volatile_goto
#define asm_volatile_goto(x...) asm goto(x)
#endif
diff --git a/include/linux/scs.h b/include/linux/scs.h
new file mode 100644
index 000000000000..c5572fd770b0
--- /dev/null
+++ b/include/linux/scs.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#ifndef _LINUX_SCS_H
+#define _LINUX_SCS_H
+
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+/*
+ * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
+ * architecture) provided ~40% safety margin on stack usage while keeping
+ * memory allocation overhead reasonable.
+ */
+#define SCS_SIZE 1024UL
+#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
+
+/*
+ * A random number outside the kernel's virtual address space to mark the
+ * end of the shadow stack.
+ */
+#define SCS_END_MAGIC 0xaf0194819b1635f6UL
+
+#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack)
+
+static inline void task_set_scs(struct task_struct *tsk, void *s)
+{
+ task_scs(tsk) = s;
+}
+
+extern void scs_init(void);
+extern void scs_task_reset(struct task_struct *tsk);
+extern int scs_prepare(struct task_struct *tsk, int node);
+extern bool scs_corrupted(struct task_struct *tsk);
+extern void scs_release(struct task_struct *tsk);
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+#define task_scs(tsk) NULL
+
+static inline void task_set_scs(struct task_struct *tsk, void *s) {}
+static inline void scs_init(void) {}
+static inline void scs_task_reset(struct task_struct *tsk) {}
+static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
+static inline bool scs_corrupted(struct task_struct *tsk) { return false; }
+static inline void scs_release(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* _LINUX_SCS_H */
diff --git a/init/init_task.c b/init/init_task.c
index bd403ed3e418..aaa71366d162 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/audit.h>
#include <linux/numa.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <linux/uaccess.h>
@@ -185,6 +186,13 @@ struct task_struct init_task
};
EXPORT_SYMBOL(init_task);

+#ifdef CONFIG_SHADOW_CALL_STACK
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
+ __aligned(SCS_SIZE) = {
+ [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
+};
+#endif
+
/*
* Initial thread structure. Alignment of this is handled by a special
* linker map entry.
diff --git a/kernel/Makefile b/kernel/Makefile
index 4cb4130ced32..c332eb9d4841 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -103,6 +103,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-$(CONFIG_PERF_EVENTS) += events/

diff --git a/kernel/fork.c b/kernel/fork.c
index d2a967bf85d5..3f54070a7a53 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,6 +94,7 @@
#include <linux/thread_info.h>
#include <linux/stackleak.h>
#include <linux/kasan.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -455,6 +456,8 @@ void put_task_stack(struct task_struct *tsk)

void free_task(struct task_struct *tsk)
{
+ scs_release(tsk);
+
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
@@ -838,6 +841,8 @@ void __init fork_init(void)
NULL, free_vm_stack_cache);
#endif

+ scs_init();
+
lockdep_init_task(&init_task);
uprobes_init();
}
@@ -897,6 +902,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_stack;

+ err = scs_prepare(tsk, node);
+ if (err)
+ goto free_stack;
+
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a2694ba82874..9bb593f7974f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11,6 +11,7 @@
#include <linux/nospec.h>

#include <linux/kcov.h>
+#include <linux/scs.h>

#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -6050,6 +6051,7 @@ void init_idle(struct task_struct *idle, int cpu)
idle->se.exec_start = sched_clock();
idle->flags |= PF_IDLE;

+ scs_task_reset(idle);
kasan_unpoison_task_stack(idle);

#ifdef CONFIG_SMP
diff --git a/kernel/scs.c b/kernel/scs.c
new file mode 100644
index 000000000000..28abed21950c
--- /dev/null
+++ b/kernel/scs.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/kasan.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/scs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/scs.h>
+
+static inline void *__scs_base(struct task_struct *tsk)
+{
+ /*
+ * To minimize risk the of exposure, architectures may clear a
+ * task's thread_info::shadow_call_stack while that task is
+ * running, and only save/restore the active shadow call stack
+ * pointer when the usual register may be clobbered (e.g. across
+ * context switches).
+ *
+ * The shadow call stack is aligned to SCS_SIZE, and grows
+ * upwards, so we can mask out the low bits to extract the base
+ * when the task is not running.
+ */
+ return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
+}
+
+static inline unsigned long *scs_magic(void *s)
+{
+ return (unsigned long *)(s + SCS_SIZE) - 1;
+}
+
+static inline void scs_set_magic(void *s)
+{
+ *scs_magic(s) = SCS_END_MAGIC;
+}
+
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+
+/* Matches NR_CACHED_STACKS for VMAP_STACK */
+#define NR_CACHED_SCS 2
+static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]);
+
+static void *scs_alloc(int node)
+{
+ int i;
+ void *s;
+
+ for (i = 0; i < NR_CACHED_SCS; i++) {
+ s = this_cpu_xchg(scs_cache[i], NULL);
+ if (s) {
+ memset(s, 0, SCS_SIZE);
+ goto out;
+ }
+ }
+
+ /*
+ * We allocate a full page for the shadow stack, which should be
+ * more than we need. Check the assumption nevertheless.
+ */
+ BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);
+
+ s = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL, 0,
+ node, __builtin_return_address(0));
+
+out:
+ if (s)
+ scs_set_magic(s);
+ /* TODO: poison for KASAN, unpoison in scs_free */
+
+ return s;
+}
+
+static void scs_free(void *s)
+{
+ int i;
+
+ for (i = 0; i < NR_CACHED_SCS; i++)
+ if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL)
+ return;
+
+ vfree_atomic(s);
+}
+
+static int scs_cleanup(unsigned int cpu)
+{
+ int i;
+ void **cache = per_cpu_ptr(scs_cache, cpu);
+
+ for (i = 0; i < NR_CACHED_SCS; i++) {
+ vfree(cache[i]);
+ cache[i] = NULL;
+ }
+
+ return 0;
+}
+
+void __init scs_init(void)
+{
+ WARN_ON(cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
+ scs_cleanup) < 0);
+}
+
+#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */
+
+static struct kmem_cache *scs_cache;
+
+static inline void *scs_alloc(int node)
+{
+ void *s;
+
+ s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
+ if (s) {
+ scs_set_magic(s);
+ /*
+ * Poison the allocation to catch unintentional accesses to
+ * the shadow stack when KASAN is enabled.
+ */
+ kasan_poison_object_data(scs_cache, s);
+ }
+
+ return s;
+}
+
+static inline void scs_free(void *s)
+{
+ kasan_unpoison_object_data(scs_cache, s);
+ kmem_cache_free(scs_cache, s);
+}
+
+void __init scs_init(void)
+{
+ scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
+ 0, NULL);
+ WARN_ON(!scs_cache);
+}
+
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+void scs_task_reset(struct task_struct *tsk)
+{
+ /*
+ * Reset the shadow stack to the base address in case the task
+ * is reused.
+ */
+ task_set_scs(tsk, __scs_base(tsk));
+}
+
+int scs_prepare(struct task_struct *tsk, int node)
+{
+ void *s;
+
+ s = scs_alloc(node);
+ if (!s)
+ return -ENOMEM;
+
+ task_set_scs(tsk, s);
+ return 0;
+}
+
+bool scs_corrupted(struct task_struct *tsk)
+{
+ unsigned long *magic = scs_magic(__scs_base(tsk));
+
+ return READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
+}
+
+void scs_release(struct task_struct *tsk)
+{
+ void *s;
+
+ s = __scs_base(tsk);
+ if (!s)
+ return;
+
+ WARN_ON(scs_corrupted(tsk));
+
+ task_set_scs(tsk, NULL);
+ scs_free(s);
+}
--
2.26.0.292.g33ef6b2f38-goog

2020-04-06 17:06:47

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v10 03/12] scs: add support for stack usage debugging

Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks. When enabled,
also prints out the highest shadow stack usage per process.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
kernel/scs.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)

diff --git a/kernel/scs.c b/kernel/scs.c
index 5245e992c692..ad74d13f2c0f 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -184,6 +184,44 @@ int scs_prepare(struct task_struct *tsk, int node)
return 0;
}

+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long scs_used(struct task_struct *tsk)
+{
+ unsigned long *p = __scs_base(tsk);
+ unsigned long *end = scs_magic(p);
+ unsigned long s = (unsigned long)p;
+
+ while (p < end && READ_ONCE_NOCHECK(*p))
+ p++;
+
+ return (unsigned long)p - s;
+}
+
+static void scs_check_usage(struct task_struct *tsk)
+{
+ static DEFINE_SPINLOCK(lock);
+ static unsigned long highest;
+ unsigned long used = scs_used(tsk);
+
+ if (used <= highest)
+ return;
+
+ spin_lock(&lock);
+
+ if (used > highest) {
+ pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
+ tsk->comm, task_pid_nr(tsk), used);
+ highest = used;
+ }
+
+ spin_unlock(&lock);
+}
+#else
+static inline void scs_check_usage(struct task_struct *tsk)
+{
+}
+#endif
+
bool scs_corrupted(struct task_struct *tsk)
{
unsigned long *magic = scs_magic(__scs_base(tsk));
@@ -200,6 +238,7 @@ void scs_release(struct task_struct *tsk)
return;

WARN_ON(scs_corrupted(tsk));
+ scs_check_usage(tsk);

scs_account(tsk, -1);
task_set_scs(tsk, NULL);
--
2.26.0.292.g33ef6b2f38-goog

2020-04-06 18:24:35

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v10 09/12] arm64: disable SCS for hypervisor code

On Mon, Apr 06, 2020 at 09:41:18AM -0700, Sami Tolvanen wrote:
> Disable SCS for code that runs at a different exception level by
> adding __noscs to __hyp_text.
>
> Suggested-by: James Morse <[email protected]>
> Signed-off-by: Sami Tolvanen <[email protected]>

Reviewed-by: Kees Cook <[email protected]>

-Kees

> Acked-by: Marc Zyngier <[email protected]>
> ---
> arch/arm64/include/asm/kvm_hyp.h | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
> index fe57f60f06a8..875b106c5d98 100644
> --- a/arch/arm64/include/asm/kvm_hyp.h
> +++ b/arch/arm64/include/asm/kvm_hyp.h
> @@ -13,7 +13,7 @@
> #include <asm/kvm_mmu.h>
> #include <asm/sysreg.h>
>
> -#define __hyp_text __section(.hyp.text) notrace
> +#define __hyp_text __section(.hyp.text) notrace __noscs
>
> #define read_sysreg_elx(r,nvh,vh) \
> ({ \
> --
> 2.26.0.292.g33ef6b2f38-goog
>

--
Kees Cook

2020-04-06 18:27:54

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v10 12/12] efi/libstub: disable SCS

On Mon, Apr 06, 2020 at 09:41:21AM -0700, Sami Tolvanen wrote:
> Shadow stacks are not available in the EFI stub, filter out SCS flags.
>
> Suggested-by: James Morse <[email protected]>
> Signed-off-by: Sami Tolvanen <[email protected]>

Reviewed-by: Kees Cook <[email protected]>

> ---
> drivers/firmware/efi/libstub/Makefile | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
> index 094eabdecfe6..fa0bb64f93d6 100644
> --- a/drivers/firmware/efi/libstub/Makefile
> +++ b/drivers/firmware/efi/libstub/Makefile
> @@ -32,6 +32,9 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
> $(call cc-option,-fno-stack-protector) \
> -D__DISABLE_EXPORTS
>
> +# remove SCS flags from all objects in this directory

nit: double space

-Kees

> +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
> +
> GCOV_PROFILE := n
> KASAN_SANITIZE := n
> UBSAN_SANITIZE := n
> --
> 2.26.0.292.g33ef6b2f38-goog
>

--
Kees Cook

2020-04-07 12:01:41

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH v10 12/12] efi/libstub: disable SCS

On Mon, 6 Apr 2020 at 18:42, Sami Tolvanen <[email protected]> wrote:
>
> Shadow stacks are not available in the EFI stub, filter out SCS flags.
>
> Suggested-by: James Morse <[email protected]>
> Signed-off-by: Sami Tolvanen <[email protected]>

Acked-by: Ard Biesheuvel <[email protected]>

> ---
> drivers/firmware/efi/libstub/Makefile | 3 +++
> 1 file changed, 3 insertions(+)
>
> diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
> index 094eabdecfe6..fa0bb64f93d6 100644
> --- a/drivers/firmware/efi/libstub/Makefile
> +++ b/drivers/firmware/efi/libstub/Makefile
> @@ -32,6 +32,9 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
> $(call cc-option,-fno-stack-protector) \
> -D__DISABLE_EXPORTS
>
> +# remove SCS flags from all objects in this directory
> +KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
> +
> GCOV_PROFILE := n
> KASAN_SANITIZE := n
> UBSAN_SANITIZE := n
> --
> 2.26.0.292.g33ef6b2f38-goog
>

2020-04-16 16:15:01

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v11 00/12] add support for Clang's Shadow Call Stack

This patch series adds support for Clang's Shadow Call Stack
(SCS) mitigation, which uses a separately allocated shadow stack
to protect against return address overwrites. More information
can be found here:

https://clang.llvm.org/docs/ShadowCallStack.html

SCS provides better protection against traditional buffer
overflows than CONFIG_STACKPROTECTOR_*, but it should be noted
that SCS security guarantees in the kernel differ from the ones
documented for user space. The kernel must store addresses of
shadow stacks used by inactive tasks and interrupt handlers in
memory, which means an attacker capable of reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

SCS is currently supported only on arm64, where the compiler
requires the x18 register to be reserved for holding the current
task's shadow stack pointer.

With -fsanitize=shadow-call-stack, the compiler injects
instructions to all non-leaf C functions to store the return
address to the shadow stack, and unconditionally load it again
before returning. As a result, SCS is incompatible with features
that rely on modifying function return addresses in the kernel
stack to alter control flow. A copy of the return address is
still kept in the kernel stack for compatibility with stack
unwinding, for example.

SCS has a minimal performance overhead, but allocating
shadow stacks increases kernel memory usage. The feature is
therefore mostly useful on hardware that lacks support for PAC
instructions.

Changes in v11:
- Rebased, added maintainers for kernel/ changes.

Changes in v10:
- Removed an unnecessary <asm/scs.h> include from head.S.

Changes in v9:
- Fixed grammar in the Kconfig help text.
- Changed Kconfig to allow SCS to be selected with the patchable-
function-entry graph tracer.
- Changed the EFI stub patch to not filter out -ffixed-x18, only
SCS flags.

Changes in v8:
- Added __noscs to __hyp_text instead of filtering SCS flags from
the entire arch/arm64/kvm/hyp directory.
- Added a patch to filter out -ffixed-x18 and SCS flags from the
EFI stub.

Changes in v7:
- Changed irq_stack_entry/exit to store the shadow stack pointer
in x24 instead of x20 as kernel_entry uses x20-x23 to store
data that can be used later. Updated the comment as well.
- Changed the Makefile in arch/arm64/kvm/hyp to also filter out
-ffixed-x18.
- Changed SHADOW_CALL_STACK to depend on !FUNCTION_GRAPH_TRACER
instead of not selecting HAVE_FUNCTION_GRAPH_TRACER with SCS.
- Removed ifdefs from the EFI wrapper and updated the comment to
explain why we are restoring x18.
- Rebased as Ard's x18 patches that were part of this series have
already been merged.

Changes in v6:
- Updated comment in the EFI RT wrapper to include the
explanation from the commit message.
- Fixed the SHADOW_CALL_STACK_VMAP config option and the
compilation errors in scs_init_irq()
- Updated the comment in entry.S to Mark's suggestion
- Fixed the WARN_ON in scs_init() to trip only when the return
value for cpuhp_setup_state() is < 0.
- Removed ifdefs from the code in arch/arm64/kernel/scs.c and
added separate shadow stacks for the SDEI handler

Changes in v5:
- Updated the comment in __scs_base() to Mark's suggestion
- Changed all instances of uintptr_t to unsigned long
- Added allocation poisoning for KASAN to catch unintentional
shadow stack accesses; moved set_set_magic before poisoning
and switched scs_used() and scs_corrupted() to access the
buffer using READ_ONCE_NOCHECK() instead
- Changed scs_free() to check for NULL instead of zero
- Renamed SCS_CACHE_SIZE to NR_CACHED_SCS
- Added a warning if cpuhp_setup_state fails in scs_init()
- Dropped patches disabling kretprobes after confirming there's
no functional conflict with SCS instrumentation
- Added an explanation to the commit message why function graph
tracing and SCS are incompatible
- Removed the ifdefs from arch/arm64/mm/proc.S and added
comments explaining why we are saving and restoring x18
- Updated scs_check_usage format to include process information

Changes in v4:
- Fixed authorship for Ard's patches
- Added missing commit messages
- Commented code that clears SCS from thread_info
- Added a comment about SCS_END_MAGIC being non-canonical

Changes in v3:
- Switched to filter-out for removing SCS flags in Makefiles
- Changed the __noscs attribute to use __no_sanitize__("...")
instead of no_sanitize("...")
- Cleaned up inline function definitions and moved task_scs()
into a macro
- Cleaned up scs_free() and scs_magic()
- Moved SCS initialization into dup_task_struct() and removed
the now unused scs_task_init()
- Added comments to __scs_base() and scs_task_reset() to better
document design choices
- Changed copy_page to make the offset and bias explicit

Changes in v2:
- Changed Ard's KVM patch to use x29 instead of x18 for the
guest context, which makes restore_callee_saved_regs cleaner
- Updated help text (and commit messages) to point out
differences in security properties compared to user space SCS
- Cleaned up config options: removed the ROP protection choice,
replaced the CC_IS_CLANG dependency with an arch-specific
cc-option test, and moved disabling of incompatible config
options to an arch-specific Kconfig
- Added CC_FLAGS_SCS, which are filtered out where needed
instead of using DISABLE_SCS
- Added a __has_feature guard around __noscs for older clang
versions


Sami Tolvanen (12):
add support for Clang's Shadow Call Stack (SCS)
scs: add accounting
scs: add support for stack usage debugging
scs: disable when function graph tracing is enabled
arm64: reserve x18 from general allocation with SCS
arm64: preserve x18 when CPU is suspended
arm64: efi: restore x18 if it was corrupted
arm64: vdso: disable Shadow Call Stack
arm64: disable SCS for hypervisor code
arm64: implement Shadow Call Stack
arm64: scs: add shadow stacks for SDEI
efi/libstub: disable SCS

Makefile | 6 +
arch/Kconfig | 35 ++++
arch/arm64/Kconfig | 5 +
arch/arm64/Makefile | 4 +
arch/arm64/include/asm/kvm_hyp.h | 2 +-
arch/arm64/include/asm/scs.h | 39 ++++
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/include/asm/thread_info.h | 3 +
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +
arch/arm64/kernel/efi-rt-wrapper.S | 11 +-
arch/arm64/kernel/entry.S | 47 ++++-
arch/arm64/kernel/head.S | 8 +
arch/arm64/kernel/irq.c | 2 +
arch/arm64/kernel/process.c | 2 +
arch/arm64/kernel/scs.c | 114 ++++++++++++
arch/arm64/kernel/sdei.c | 7 +
arch/arm64/kernel/smp.c | 4 +
arch/arm64/kernel/vdso/Makefile | 2 +-
arch/arm64/mm/proc.S | 14 ++
drivers/base/node.c | 6 +
drivers/firmware/efi/libstub/Makefile | 3 +
fs/proc/meminfo.c | 4 +
include/linux/compiler-clang.h | 6 +
include/linux/compiler_types.h | 4 +
include/linux/mmzone.h | 3 +
include/linux/scs.h | 57 ++++++
init/init_task.c | 8 +
kernel/Makefile | 1 +
kernel/fork.c | 9 +
kernel/sched/core.c | 2 +
kernel/scs.c | 246 ++++++++++++++++++++++++++
mm/page_alloc.c | 6 +
mm/vmstat.c | 3 +
34 files changed, 662 insertions(+), 7 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c


base-commit: 00086336a8d96a04aa960f912287692a258f6cf5
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-16 16:15:02

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v11 03/12] scs: add support for stack usage debugging

Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks. When enabled,
also prints out the highest shadow stack usage per process.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
kernel/scs.c | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)

diff --git a/kernel/scs.c b/kernel/scs.c
index 5245e992c692..ad74d13f2c0f 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -184,6 +184,44 @@ int scs_prepare(struct task_struct *tsk, int node)
return 0;
}

+#ifdef CONFIG_DEBUG_STACK_USAGE
+static inline unsigned long scs_used(struct task_struct *tsk)
+{
+ unsigned long *p = __scs_base(tsk);
+ unsigned long *end = scs_magic(p);
+ unsigned long s = (unsigned long)p;
+
+ while (p < end && READ_ONCE_NOCHECK(*p))
+ p++;
+
+ return (unsigned long)p - s;
+}
+
+static void scs_check_usage(struct task_struct *tsk)
+{
+ static DEFINE_SPINLOCK(lock);
+ static unsigned long highest;
+ unsigned long used = scs_used(tsk);
+
+ if (used <= highest)
+ return;
+
+ spin_lock(&lock);
+
+ if (used > highest) {
+ pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
+ tsk->comm, task_pid_nr(tsk), used);
+ highest = used;
+ }
+
+ spin_unlock(&lock);
+}
+#else
+static inline void scs_check_usage(struct task_struct *tsk)
+{
+}
+#endif
+
bool scs_corrupted(struct task_struct *tsk)
{
unsigned long *magic = scs_magic(__scs_base(tsk));
@@ -200,6 +238,7 @@ void scs_release(struct task_struct *tsk)
return;

WARN_ON(scs_corrupted(tsk));
+ scs_check_usage(tsk);

scs_account(tsk, -1);
task_set_scs(tsk, NULL);
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-16 16:15:33

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v11 07/12] arm64: efi: restore x18 if it was corrupted

If we detect a corrupted x18, restore the register before jumping back
to potentially SCS instrumented code. This is safe, because the wrapper
is called with preemption disabled and a separate shadow stack is used
for interrupt handling.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/kernel/efi-rt-wrapper.S | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 3fc71106cb2b..6ca6c0dc11a1 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -34,5 +34,14 @@ ENTRY(__efi_rt_asm_wrapper)
ldp x29, x30, [sp], #32
b.ne 0f
ret
-0: b efi_handle_corrupted_x18 // tail call
+0:
+ /*
+ * With CONFIG_SHADOW_CALL_STACK, the kernel uses x18 to store a
+ * shadow stack pointer, which we need to restore before returning to
+ * potentially instrumented code. This is safe because the wrapper is
+ * called with preemption disabled and a separate shadow stack is used
+ * for interrupts.
+ */
+ mov x18, x2
+ b efi_handle_corrupted_x18 // tail call
ENDPROC(__efi_rt_asm_wrapper)
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-16 16:15:48

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v11 11/12] arm64: scs: add shadow stacks for SDEI

This change adds per-CPU shadow call stacks for the SDEI handler.
Similarly to how the kernel stacks are handled, we add separate shadow
stacks for normal and critical events.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: James Morse <[email protected]>
Tested-by: James Morse <[email protected]>
---
arch/arm64/include/asm/scs.h | 2 +
arch/arm64/kernel/entry.S | 14 ++++-
arch/arm64/kernel/scs.c | 106 +++++++++++++++++++++++++++++------
arch/arm64/kernel/sdei.c | 7 +++
4 files changed, 112 insertions(+), 17 deletions(-)

diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
index c50d2b0c6c5f..8e327e14bc15 100644
--- a/arch/arm64/include/asm/scs.h
+++ b/arch/arm64/include/asm/scs.h
@@ -9,6 +9,7 @@
#ifdef CONFIG_SHADOW_CALL_STACK

extern void scs_init_irq(void);
+extern int scs_init_sdei(void);

static __always_inline void scs_save(struct task_struct *tsk)
{
@@ -27,6 +28,7 @@ static inline void scs_overflow_check(struct task_struct *tsk)
#else /* CONFIG_SHADOW_CALL_STACK */

static inline void scs_init_irq(void) {}
+static inline int scs_init_sdei(void) { return 0; }
static inline void scs_save(struct task_struct *tsk) {}
static inline void scs_overflow_check(struct task_struct *tsk) {}

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index c33264ce7258..768cd7abd32c 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1058,13 +1058,16 @@ SYM_CODE_START(__sdei_asm_handler)

mov x19, x1

+#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
+ ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
+#endif
+
#ifdef CONFIG_VMAP_STACK
/*
* entry.S may have been using sp as a scratch register, find whether
* this is a normal or critical event and switch to the appropriate
* stack for this CPU.
*/
- ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
cbnz w4, 1f
ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
b 2f
@@ -1074,6 +1077,15 @@ SYM_CODE_START(__sdei_asm_handler)
mov sp, x5
#endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* Use a separate shadow call stack for normal and critical events */
+ cbnz w4, 3f
+ ldr_this_cpu dst=x18, sym=sdei_shadow_call_stack_normal_ptr, tmp=x6
+ b 4f
+3: ldr_this_cpu dst=x18, sym=sdei_shadow_call_stack_critical_ptr, tmp=x6
+4:
+#endif
+
/*
* We may have interrupted userspace, or a guest, or exit-from or
* return-to either of these. We can't trust sp_el0, restore it.
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
index eaadf5430baa..dddb7c56518b 100644
--- a/arch/arm64/kernel/scs.c
+++ b/arch/arm64/kernel/scs.c
@@ -10,31 +10,105 @@
#include <asm/pgtable.h>
#include <asm/scs.h>

-DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+#define DECLARE_SCS(name) \
+ DECLARE_PER_CPU(unsigned long *, name ## _ptr); \
+ DECLARE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name)

-#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
-DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
- __aligned(SCS_SIZE);
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+#define DEFINE_SCS(name) \
+ DEFINE_PER_CPU(unsigned long *, name ## _ptr)
+#else
+/* Allocate a static per-CPU shadow stack */
+#define DEFINE_SCS(name) \
+ DEFINE_PER_CPU(unsigned long *, name ## _ptr); \
+ DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \
+ __aligned(SCS_SIZE)
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+DECLARE_SCS(irq_shadow_call_stack);
+DECLARE_SCS(sdei_shadow_call_stack_normal);
+DECLARE_SCS(sdei_shadow_call_stack_critical);
+
+DEFINE_SCS(irq_shadow_call_stack);
+#ifdef CONFIG_ARM_SDE_INTERFACE
+DEFINE_SCS(sdei_shadow_call_stack_normal);
+DEFINE_SCS(sdei_shadow_call_stack_critical);
#endif

+static int scs_alloc_percpu(unsigned long * __percpu *ptr, int cpu)
+{
+ unsigned long *p;
+
+ p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ if (!p)
+ return -ENOMEM;
+ per_cpu(*ptr, cpu) = p;
+
+ return 0;
+}
+
+static void scs_free_percpu(unsigned long * __percpu *ptr, int cpu)
+{
+ unsigned long *p = per_cpu(*ptr, cpu);
+
+ if (p) {
+ per_cpu(*ptr, cpu) = NULL;
+ vfree(p);
+ }
+}
+
+static void scs_free_sdei(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ scs_free_percpu(&sdei_shadow_call_stack_normal_ptr, cpu);
+ scs_free_percpu(&sdei_shadow_call_stack_critical_ptr, cpu);
+ }
+}
+
void scs_init_irq(void)
{
int cpu;

for_each_possible_cpu(cpu) {
-#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
- unsigned long *p;
+ if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK_VMAP))
+ WARN_ON(scs_alloc_percpu(&irq_shadow_call_stack_ptr,
+ cpu));
+ else
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ per_cpu(irq_shadow_call_stack, cpu);
+ }
+}

- p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
- VMALLOC_START, VMALLOC_END,
- GFP_SCS, PAGE_KERNEL,
- 0, cpu_to_node(cpu),
- __builtin_return_address(0));
+int scs_init_sdei(void)
+{
+ int cpu;

- per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
-#else
- per_cpu(irq_shadow_call_stack_ptr, cpu) =
- per_cpu(irq_shadow_call_stack, cpu);
-#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+ if (!IS_ENABLED(CONFIG_ARM_SDE_INTERFACE))
+ return 0;
+
+ for_each_possible_cpu(cpu) {
+ if (IS_ENABLED(CONFIG_SHADOW_CALL_STACK_VMAP)) {
+ if (scs_alloc_percpu(
+ &sdei_shadow_call_stack_normal_ptr, cpu) ||
+ scs_alloc_percpu(
+ &sdei_shadow_call_stack_critical_ptr, cpu)) {
+ scs_free_sdei();
+ return -ENOMEM;
+ }
+ } else {
+ per_cpu(sdei_shadow_call_stack_normal_ptr, cpu) =
+ per_cpu(sdei_shadow_call_stack_normal, cpu);
+ per_cpu(sdei_shadow_call_stack_critical_ptr, cpu) =
+ per_cpu(sdei_shadow_call_stack_critical, cpu);
+ }
}
+
+ return 0;
}
diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c
index d6259dac62b6..2854b9f7760a 100644
--- a/arch/arm64/kernel/sdei.c
+++ b/arch/arm64/kernel/sdei.c
@@ -13,6 +13,7 @@
#include <asm/kprobes.h>
#include <asm/mmu.h>
#include <asm/ptrace.h>
+#include <asm/scs.h>
#include <asm/sections.h>
#include <asm/stacktrace.h>
#include <asm/sysreg.h>
@@ -162,6 +163,12 @@ unsigned long sdei_arch_get_entry_point(int conduit)
return 0;
}

+ if (scs_init_sdei()) {
+ if (IS_ENABLED(CONFIG_VMAP_STACK))
+ free_sdei_stacks();
+ return 0;
+ }
+
sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC;

#ifdef CONFIG_UNMAP_KERNEL_AT_EL0
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-16 16:16:09

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v11 10/12] arm64: implement Shadow Call Stack

This change implements shadow stack switching, initial SCS set-up,
and interrupt shadow stacks for arm64.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/Kconfig | 5 ++++
arch/arm64/include/asm/scs.h | 37 +++++++++++++++++++++++++
arch/arm64/include/asm/thread_info.h | 3 +++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +++
arch/arm64/kernel/entry.S | 33 +++++++++++++++++++++--
arch/arm64/kernel/head.S | 8 ++++++
arch/arm64/kernel/irq.c | 2 ++
arch/arm64/kernel/process.c | 2 ++
arch/arm64/kernel/scs.c | 40 ++++++++++++++++++++++++++++
arch/arm64/kernel/smp.c | 4 +++
11 files changed, 136 insertions(+), 2 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 40fb05d96c60..c380a16533f6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -64,6 +64,7 @@ config ARM64
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_SUPPORTS_MEMORY_FAILURE
+ select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
select ARCH_SUPPORTS_NUMA_BALANCING
@@ -1025,6 +1026,10 @@ config ARCH_HAS_CACHE_LINE_SIZE
config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y if PGTABLE_LEVELS > 2

+# Supported by clang >= 7.0
+config CC_HAVE_SHADOW_CALL_STACK
+ def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
+
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
---help---
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
new file mode 100644
index 000000000000..c50d2b0c6c5f
--- /dev/null
+++ b/arch/arm64/include/asm/scs.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/scs.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+extern void scs_init_irq(void);
+
+static __always_inline void scs_save(struct task_struct *tsk)
+{
+ void *s;
+
+ asm volatile("mov %0, x18" : "=r" (s));
+ task_set_scs(tsk, s);
+}
+
+static inline void scs_overflow_check(struct task_struct *tsk)
+{
+ if (unlikely(scs_corrupted(tsk)))
+ panic("corrupted shadow stack detected inside scheduler\n");
+}
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+static inline void scs_init_irq(void) {}
+static inline void scs_save(struct task_struct *tsk) {}
+static inline void scs_overflow_check(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* __ASSEMBLY __ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 512174a8e789..1fb651f73da3 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -41,6 +41,9 @@ struct thread_info {
#endif
} preempt;
};
+#ifdef CONFIG_SHADOW_CALL_STACK
+ void *shadow_call_stack;
+#endif
};

#define thread_saved_pc(tsk) \
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 4e5b8ee31442..151f28521f1e 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-y += vdso/ probes/
obj-$(CONFIG_COMPAT_VDSO) += vdso32/
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 9981a0a5a87f..777a662888ec 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -33,6 +33,9 @@ int main(void)
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
+#endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+ DEFINE(TSK_TI_SCS, offsetof(struct task_struct, thread_info.shadow_call_stack));
#endif
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
#ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ddcde093c433..c33264ce7258 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -179,6 +179,11 @@ alternative_cb_end
apply_ssbd 1, x22, x23

ptrauth_keys_install_kernel tsk, 1, x20, x22, x23
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [tsk, #TSK_TI_SCS] // Restore shadow call stack
+ str xzr, [tsk, #TSK_TI_SCS] // Limit visibility of saved SCS
+#endif
.else
add x21, sp, #S_FRAME_SIZE
get_current_task tsk
@@ -280,6 +285,12 @@ alternative_else_nop_endif
ct_user_enter
.endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ .if \el == 0
+ str x18, [tsk, #TSK_TI_SCS] // Save shadow call stack
+ .endif
+#endif
+
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
@@ -388,6 +399,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

.macro irq_stack_entry
mov x19, sp // preserve the original sp
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x24, x18 // preserve the original shadow stack
+#endif

/*
* Compare sp with the base of the task stack.
@@ -405,15 +419,25 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

/* switch to the irq stack */
mov sp, x26
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* also switch to the irq shadow stack */
+ ldr_this_cpu x18, irq_shadow_call_stack_ptr, x26
+#endif
+
9998:
.endm

/*
- * x19 should be preserved between irq_stack_entry and
- * irq_stack_exit.
+ * The callee-saved regs (x19-x29) should be preserved between
+ * irq_stack_entry and irq_stack_exit, but note that kernel_entry
+ * uses x20-x23 to store data for later use.
*/
.macro irq_stack_exit
mov sp, x19
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x18, x24
+#endif
.endm

/* GPRs used by entry code */
@@ -901,6 +925,11 @@ SYM_FUNC_START(cpu_switch_to)
mov sp, x9
msr sp_el0, x1
ptrauth_keys_install_kernel x1, 1, x8, x9, x10
+#ifdef CONFIG_SHADOW_CALL_STACK
+ str x18, [x0, #TSK_TI_SCS]
+ ldr x18, [x1, #TSK_TI_SCS]
+ str xzr, [x1, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
ret
SYM_FUNC_END(cpu_switch_to)
NOKPROBE(cpu_switch_to)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 57a91032b4c2..1514445bbccb 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -424,6 +424,10 @@ SYM_FUNC_START_LOCAL(__primary_switched)
stp xzr, x30, [sp, #-16]!
mov x29, sp

+#ifdef CONFIG_SHADOW_CALL_STACK
+ adr_l x18, init_shadow_call_stack // Set shadow call stack
+#endif
+
str_l x21, __fdt_pointer, x5 // Save FDT pointer

ldr_l x4, kimage_vaddr // Save the offset between
@@ -737,6 +741,10 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow
msr sp_el0, x2
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [x2, #TSK_TI_SCS] // set shadow call stack
+ str xzr, [x2, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
mov x29, #0
mov x30, #0
b secondary_start_kernel
diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c
index 04a327ccf84d..fe0ca522ff60 100644
--- a/arch/arm64/kernel/irq.c
+++ b/arch/arm64/kernel/irq.c
@@ -21,6 +21,7 @@
#include <linux/vmalloc.h>
#include <asm/daifflags.h>
#include <asm/vmap_stack.h>
+#include <asm/scs.h>

unsigned long irq_err_count;

@@ -63,6 +64,7 @@ static void init_irq_stacks(void)
void __init init_IRQ(void)
{
init_irq_stacks();
+ scs_init_irq();
irqchip_init();
if (!handle_arch_irq)
panic("No interrupt controller found.");
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 56be4cbf771f..a35d3318492c 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -52,6 +52,7 @@
#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/pointer_auth.h>
+#include <asm/scs.h>
#include <asm/stacktrace.h>

#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
@@ -515,6 +516,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
entry_task_switch(next);
uao_thread_switch(next);
ssbs_thread_switch(next);
+ scs_overflow_check(next);

/*
* Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
new file mode 100644
index 000000000000..eaadf5430baa
--- /dev/null
+++ b/arch/arm64/kernel/scs.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/percpu.h>
+#include <linux/vmalloc.h>
+#include <asm/pgtable.h>
+#include <asm/scs.h>
+
+DEFINE_PER_CPU(unsigned long *, irq_shadow_call_stack_ptr);
+
+#ifndef CONFIG_SHADOW_CALL_STACK_VMAP
+DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], irq_shadow_call_stack)
+ __aligned(SCS_SIZE);
+#endif
+
+void scs_init_irq(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+ unsigned long *p;
+
+ p = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL,
+ 0, cpu_to_node(cpu),
+ __builtin_return_address(0));
+
+ per_cpu(irq_shadow_call_stack_ptr, cpu) = p;
+#else
+ per_cpu(irq_shadow_call_stack_ptr, cpu) =
+ per_cpu(irq_shadow_call_stack, cpu);
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+ }
+}
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 061f60fe452f..1d112e34a636 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -46,6 +46,7 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
+#include <asm/scs.h>
#include <asm/smp_plat.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -370,6 +371,9 @@ void cpu_die(void)
unsigned int cpu = smp_processor_id();
const struct cpu_operations *ops = get_cpu_ops(cpu);

+ /* Save the shadow stack pointer before exiting the idle task */
+ scs_save(current);
+
idle_task_exit();

local_daif_mask();
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-16 16:16:30

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v11 04/12] scs: disable when function graph tracing is enabled

The graph tracer hooks returns by modifying frame records on the
(regular) stack, but with SCS the return address is taken from the
shadow stack, and the value in the frame record has no effect. As we
don't currently have a mechanism to determine the corresponding slot
on the shadow stack (and to pass this through the ftrace
infrastructure), for now let's disable SCS when the graph tracer is
enabled.

With SCS the return address is taken from the shadow stack and the
value in the frame record has no effect. The mcount based graph tracer
hooks returns by modifying frame records on the (regular) stack, and
thus is not compatible. The patchable-function-entry graph tracer
used for DYNAMIC_FTRACE_WITH_REGS modifies the LR before it is saved
to the shadow stack, and is compatible.

Modifying the mcount based graph tracer to work with SCS would require
a mechanism to determine the corresponding slot on the shadow stack
(and to pass this through the ftrace infrastructure), and we expect
that everyone will eventually move to the patchable-function-entry
based graph tracer anyway, so for now let's disable SCS when the
mcount-based graph tracer is enabled.

SCS and patchable-function-entry are both supported from LLVM 10.x.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/Kconfig | 1 +
1 file changed, 1 insertion(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index 691a552c2cc3..c53cb9025ad2 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -542,6 +542,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK

config SHADOW_CALL_STACK
bool "Clang Shadow Call Stack"
+ depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
help
This option enables Clang's Shadow Call Stack, which uses a
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-16 16:17:21

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v11 01/12] add support for Clang's Shadow Call Stack (SCS)

This change adds generic support for Clang's Shadow Call Stack,
which uses a shadow stack to protect return addresses from being
overwritten by an attacker. Details are available here:

https://clang.llvm.org/docs/ShadowCallStack.html

Note that security guarantees in the kernel differ from the
ones documented for user space. The kernel must store addresses
of shadow stacks used by other tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Miguel Ojeda <[email protected]>
---
Makefile | 6 ++
arch/Kconfig | 34 ++++++
include/linux/compiler-clang.h | 6 ++
include/linux/compiler_types.h | 4 +
include/linux/scs.h | 57 ++++++++++
init/init_task.c | 8 ++
kernel/Makefile | 1 +
kernel/fork.c | 9 ++
kernel/sched/core.c | 2 +
kernel/scs.c | 187 +++++++++++++++++++++++++++++++++
10 files changed, 314 insertions(+)
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c

diff --git a/Makefile b/Makefile
index 70def4907036..baea6024b409 100644
--- a/Makefile
+++ b/Makefile
@@ -866,6 +866,12 @@ ifdef CONFIG_LIVEPATCH
KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
endif

+ifdef CONFIG_SHADOW_CALL_STACK
+CC_FLAGS_SCS := -fsanitize=shadow-call-stack
+KBUILD_CFLAGS += $(CC_FLAGS_SCS)
+export CC_FLAGS_SCS
+endif
+
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)

diff --git a/arch/Kconfig b/arch/Kconfig
index 786a85d4ad40..691a552c2cc3 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -533,6 +533,40 @@ config STACKPROTECTOR_STRONG
about 20% of all kernel functions, which increases the kernel code
size by about 2%.

+config ARCH_SUPPORTS_SHADOW_CALL_STACK
+ bool
+ help
+ An architecture should select this if it supports Clang's Shadow
+ Call Stack, has asm/scs.h, and implements runtime support for shadow
+ stack switching.
+
+config SHADOW_CALL_STACK
+ bool "Clang Shadow Call Stack"
+ depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
+ help
+ This option enables Clang's Shadow Call Stack, which uses a
+ shadow stack to protect function return addresses from being
+ overwritten by an attacker. More information can be found in
+ Clang's documentation:
+
+ https://clang.llvm.org/docs/ShadowCallStack.html
+
+ Note that security guarantees in the kernel differ from the ones
+ documented for user space. The kernel must store addresses of shadow
+ stacks used by other tasks and interrupt handlers in memory, which
+ means an attacker capable of reading and writing arbitrary memory
+ may be able to locate them and hijack control flow by modifying
+ shadow stacks that are not currently in use.
+
+config SHADOW_CALL_STACK_VMAP
+ bool "Use virtually mapped shadow call stacks"
+ depends on SHADOW_CALL_STACK
+ help
+ Use virtually mapped shadow call stacks. Selecting this option
+ provides better stack exhaustion protection, but increases per-thread
+ memory consumption as a full page is allocated for each shadow stack.
+
+
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 333a6695a918..18fc4d29ef27 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -42,3 +42,9 @@
* compilers, like ICC.
*/
#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#if __has_feature(shadow_call_stack)
+# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
+#else
+# define __noscs
+#endif
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index e970f97a7fcb..97b62f47a80d 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -193,6 +193,10 @@ struct ftrace_likely_data {
# define randomized_struct_fields_end
#endif

+#ifndef __noscs
+# define __noscs
+#endif
+
#ifndef asm_volatile_goto
#define asm_volatile_goto(x...) asm goto(x)
#endif
diff --git a/include/linux/scs.h b/include/linux/scs.h
new file mode 100644
index 000000000000..c5572fd770b0
--- /dev/null
+++ b/include/linux/scs.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#ifndef _LINUX_SCS_H
+#define _LINUX_SCS_H
+
+#include <linux/gfp.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+/*
+ * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
+ * architecture) provided ~40% safety margin on stack usage while keeping
+ * memory allocation overhead reasonable.
+ */
+#define SCS_SIZE 1024UL
+#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
+
+/*
+ * A random number outside the kernel's virtual address space to mark the
+ * end of the shadow stack.
+ */
+#define SCS_END_MAGIC 0xaf0194819b1635f6UL
+
+#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack)
+
+static inline void task_set_scs(struct task_struct *tsk, void *s)
+{
+ task_scs(tsk) = s;
+}
+
+extern void scs_init(void);
+extern void scs_task_reset(struct task_struct *tsk);
+extern int scs_prepare(struct task_struct *tsk, int node);
+extern bool scs_corrupted(struct task_struct *tsk);
+extern void scs_release(struct task_struct *tsk);
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+#define task_scs(tsk) NULL
+
+static inline void task_set_scs(struct task_struct *tsk, void *s) {}
+static inline void scs_init(void) {}
+static inline void scs_task_reset(struct task_struct *tsk) {}
+static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
+static inline bool scs_corrupted(struct task_struct *tsk) { return false; }
+static inline void scs_release(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* _LINUX_SCS_H */
diff --git a/init/init_task.c b/init/init_task.c
index bd403ed3e418..aaa71366d162 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/audit.h>
#include <linux/numa.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <linux/uaccess.h>
@@ -185,6 +186,13 @@ struct task_struct init_task
};
EXPORT_SYMBOL(init_task);

+#ifdef CONFIG_SHADOW_CALL_STACK
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
+ __aligned(SCS_SIZE) = {
+ [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
+};
+#endif
+
/*
* Initial thread structure. Alignment of this is handled by a special
* linker map entry.
diff --git a/kernel/Makefile b/kernel/Makefile
index 4cb4130ced32..c332eb9d4841 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -103,6 +103,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-$(CONFIG_PERF_EVENTS) += events/

diff --git a/kernel/fork.c b/kernel/fork.c
index 4385f3d639f2..c4c984d29573 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,6 +94,7 @@
#include <linux/thread_info.h>
#include <linux/stackleak.h>
#include <linux/kasan.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -456,6 +457,8 @@ void put_task_stack(struct task_struct *tsk)

void free_task(struct task_struct *tsk)
{
+ scs_release(tsk);
+
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
@@ -840,6 +843,8 @@ void __init fork_init(void)
NULL, free_vm_stack_cache);
#endif

+ scs_init();
+
lockdep_init_task(&init_task);
uprobes_init();
}
@@ -899,6 +904,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_stack;

+ err = scs_prepare(tsk, node);
+ if (err)
+ goto free_stack;
+
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3a61a3b8eaa9..c99620c1ec20 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11,6 +11,7 @@
#include <linux/nospec.h>

#include <linux/kcov.h>
+#include <linux/scs.h>

#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -6045,6 +6046,7 @@ void init_idle(struct task_struct *idle, int cpu)
idle->se.exec_start = sched_clock();
idle->flags |= PF_IDLE;

+ scs_task_reset(idle);
kasan_unpoison_task_stack(idle);

#ifdef CONFIG_SMP
diff --git a/kernel/scs.c b/kernel/scs.c
new file mode 100644
index 000000000000..28abed21950c
--- /dev/null
+++ b/kernel/scs.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/cpuhotplug.h>
+#include <linux/kasan.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/scs.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/scs.h>
+
+static inline void *__scs_base(struct task_struct *tsk)
+{
+ /*
+ * To minimize risk the of exposure, architectures may clear a
+ * task's thread_info::shadow_call_stack while that task is
+ * running, and only save/restore the active shadow call stack
+ * pointer when the usual register may be clobbered (e.g. across
+ * context switches).
+ *
+ * The shadow call stack is aligned to SCS_SIZE, and grows
+ * upwards, so we can mask out the low bits to extract the base
+ * when the task is not running.
+ */
+ return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
+}
+
+static inline unsigned long *scs_magic(void *s)
+{
+ return (unsigned long *)(s + SCS_SIZE) - 1;
+}
+
+static inline void scs_set_magic(void *s)
+{
+ *scs_magic(s) = SCS_END_MAGIC;
+}
+
+#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
+
+/* Matches NR_CACHED_STACKS for VMAP_STACK */
+#define NR_CACHED_SCS 2
+static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]);
+
+static void *scs_alloc(int node)
+{
+ int i;
+ void *s;
+
+ for (i = 0; i < NR_CACHED_SCS; i++) {
+ s = this_cpu_xchg(scs_cache[i], NULL);
+ if (s) {
+ memset(s, 0, SCS_SIZE);
+ goto out;
+ }
+ }
+
+ /*
+ * We allocate a full page for the shadow stack, which should be
+ * more than we need. Check the assumption nevertheless.
+ */
+ BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);
+
+ s = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
+ VMALLOC_START, VMALLOC_END,
+ GFP_SCS, PAGE_KERNEL, 0,
+ node, __builtin_return_address(0));
+
+out:
+ if (s)
+ scs_set_magic(s);
+ /* TODO: poison for KASAN, unpoison in scs_free */
+
+ return s;
+}
+
+static void scs_free(void *s)
+{
+ int i;
+
+ for (i = 0; i < NR_CACHED_SCS; i++)
+ if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL)
+ return;
+
+ vfree_atomic(s);
+}
+
+static int scs_cleanup(unsigned int cpu)
+{
+ int i;
+ void **cache = per_cpu_ptr(scs_cache, cpu);
+
+ for (i = 0; i < NR_CACHED_SCS; i++) {
+ vfree(cache[i]);
+ cache[i] = NULL;
+ }
+
+ return 0;
+}
+
+void __init scs_init(void)
+{
+ WARN_ON(cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
+ scs_cleanup) < 0);
+}
+
+#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */
+
+static struct kmem_cache *scs_cache;
+
+static inline void *scs_alloc(int node)
+{
+ void *s;
+
+ s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
+ if (s) {
+ scs_set_magic(s);
+ /*
+ * Poison the allocation to catch unintentional accesses to
+ * the shadow stack when KASAN is enabled.
+ */
+ kasan_poison_object_data(scs_cache, s);
+ }
+
+ return s;
+}
+
+static inline void scs_free(void *s)
+{
+ kasan_unpoison_object_data(scs_cache, s);
+ kmem_cache_free(scs_cache, s);
+}
+
+void __init scs_init(void)
+{
+ scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
+ 0, NULL);
+ WARN_ON(!scs_cache);
+}
+
+#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
+
+void scs_task_reset(struct task_struct *tsk)
+{
+ /*
+ * Reset the shadow stack to the base address in case the task
+ * is reused.
+ */
+ task_set_scs(tsk, __scs_base(tsk));
+}
+
+int scs_prepare(struct task_struct *tsk, int node)
+{
+ void *s;
+
+ s = scs_alloc(node);
+ if (!s)
+ return -ENOMEM;
+
+ task_set_scs(tsk, s);
+ return 0;
+}
+
+bool scs_corrupted(struct task_struct *tsk)
+{
+ unsigned long *magic = scs_magic(__scs_base(tsk));
+
+ return READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
+}
+
+void scs_release(struct task_struct *tsk)
+{
+ void *s;
+
+ s = __scs_base(tsk);
+ if (!s)
+ return;
+
+ WARN_ON(scs_corrupted(tsk));
+
+ task_set_scs(tsk, NULL);
+ scs_free(s);
+}
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-16 16:17:36

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v11 08/12] arm64: vdso: disable Shadow Call Stack

Shadow stacks are only available in the kernel, so disable SCS
instrumentation for the vDSO.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/kernel/vdso/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index dd2514bb1511..a87a4f11724e 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -25,7 +25,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING

VDSO_LDFLAGS := -Bsymbolic

-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS)
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-16 20:39:13

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v11 06/12] arm64: preserve x18 when CPU is suspended

Don't lose the current task's shadow stack when the CPU is suspended.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/mm/proc.S | 14 ++++++++++++++
2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 8939c87c4dce..0cde2f473971 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H

-#define NR_CTX_REGS 12
+#define NR_CTX_REGS 13
#define NR_CALLEE_SAVED_REGS 12

/*
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 197a9ba2d5ea..ed15be0f8103 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -58,6 +58,8 @@
* cpu_do_suspend - save CPU registers context
*
* x0: virtual address of context pointer
+ *
+ * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
*/
SYM_FUNC_START(cpu_do_suspend)
mrs x2, tpidr_el0
@@ -82,6 +84,11 @@ alternative_endif
stp x8, x9, [x0, #48]
stp x10, x11, [x0, #64]
stp x12, x13, [x0, #80]
+ /*
+ * Save x18 as it may be used as a platform register, e.g. by shadow
+ * call stack.
+ */
+ str x18, [x0, #96]
ret
SYM_FUNC_END(cpu_do_suspend)

@@ -98,6 +105,13 @@ SYM_FUNC_START(cpu_do_resume)
ldp x9, x10, [x0, #48]
ldp x11, x12, [x0, #64]
ldp x13, x14, [x0, #80]
+ /*
+ * Restore x18, as it may be used as a platform register, and clear
+ * the buffer to minimize the risk of exposure when used for shadow
+ * call stack.
+ */
+ ldr x18, [x0, #96]
+ str xzr, [x0, #96]
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-16 20:39:13

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v11 02/12] scs: add accounting

This change adds accounting for the memory allocated for shadow stacks.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
drivers/base/node.c | 6 ++++++
fs/proc/meminfo.c | 4 ++++
include/linux/mmzone.h | 3 +++
kernel/scs.c | 20 ++++++++++++++++++++
mm/page_alloc.c | 6 ++++++
mm/vmstat.c | 3 +++
6 files changed, 42 insertions(+)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 10d7e818e118..502ab5447c8d 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d AnonPages: %8lu kB\n"
"Node %d Shmem: %8lu kB\n"
"Node %d KernelStack: %8lu kB\n"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ "Node %d ShadowCallStack:%8lu kB\n"
+#endif
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
@@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8c1f1bb1a5ce..49768005a79e 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB));
+#ifdef CONFIG_SHADOW_CALL_STACK
+ seq_printf(m, "ShadowCallStack:%8lu kB\n",
+ global_zone_page_state(NR_KERNEL_SCS_BYTES) / 1024);
+#endif
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1b9de7d220fb..89aa96797743 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -156,6 +156,9 @@ enum zone_stat_item {
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ NR_KERNEL_SCS_BYTES, /* measured in bytes */
+#endif
/* Second 128 byte cacheline */
NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
diff --git a/kernel/scs.c b/kernel/scs.c
index 28abed21950c..5245e992c692 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -12,6 +12,7 @@
#include <linux/scs.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include <linux/vmstat.h>
#include <asm/scs.h>

static inline void *__scs_base(struct task_struct *tsk)
@@ -89,6 +90,11 @@ static void scs_free(void *s)
vfree_atomic(s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return vmalloc_to_page(__scs_base(tsk));
+}
+
static int scs_cleanup(unsigned int cpu)
{
int i;
@@ -135,6 +141,11 @@ static inline void scs_free(void *s)
kmem_cache_free(scs_cache, s);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return virt_to_page(__scs_base(tsk));
+}
+
void __init scs_init(void)
{
scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
@@ -153,6 +164,12 @@ void scs_task_reset(struct task_struct *tsk)
task_set_scs(tsk, __scs_base(tsk));
}

+static void scs_account(struct task_struct *tsk, int account)
+{
+ mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_BYTES,
+ account * SCS_SIZE);
+}
+
int scs_prepare(struct task_struct *tsk, int node)
{
void *s;
@@ -162,6 +179,8 @@ int scs_prepare(struct task_struct *tsk, int node)
return -ENOMEM;

task_set_scs(tsk, s);
+ scs_account(tsk, 1);
+
return 0;
}

@@ -182,6 +201,7 @@ void scs_release(struct task_struct *tsk)

WARN_ON(scs_corrupted(tsk));

+ scs_account(tsk, -1);
task_set_scs(tsk, NULL);
scs_free(s);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 69827d4fa052..721879d56bbd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5411,6 +5411,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" managed:%lukB"
" mlocked:%lukB"
" kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ " shadow_call_stack:%lukB"
+#endif
" pagetables:%lukB"
" bounce:%lukB"
" free_pcp:%lukB"
@@ -5433,6 +5436,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)),
zone_page_state(zone, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ zone_page_state(zone, NR_KERNEL_SCS_BYTES) / 1024,
+#endif
K(zone_page_state(zone, NR_PAGETABLE)),
K(zone_page_state(zone, NR_BOUNCE)),
K(free_pcp),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 96d21a792b57..089602efa477 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1119,6 +1119,9 @@ const char * const vmstat_text[] = {
"nr_mlock",
"nr_page_table_pages",
"nr_kernel_stack",
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ "nr_shadow_call_stack_bytes",
+#endif
"nr_bounce",
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-16 20:39:15

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v11 12/12] efi/libstub: disable SCS

Shadow stacks are not available in the EFI stub, filter out SCS flags.

Suggested-by: James Morse <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Ard Biesheuvel <[email protected]>
---
drivers/firmware/efi/libstub/Makefile | 3 +++
1 file changed, 3 insertions(+)

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 094eabdecfe6..b52ae8c29560 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -32,6 +32,9 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
$(call cc-option,-fno-stack-protector) \
-D__DISABLE_EXPORTS

+# remove SCS flags from all objects in this directory
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+
GCOV_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-16 20:40:50

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v11 09/12] arm64: disable SCS for hypervisor code

Disable SCS for code that runs at a different exception level by
adding __noscs to __hyp_text.

Suggested-by: James Morse <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Marc Zyngier <[email protected]>
---
arch/arm64/include/asm/kvm_hyp.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index fe57f60f06a8..875b106c5d98 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -13,7 +13,7 @@
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>

-#define __hyp_text __section(.hyp.text) notrace
+#define __hyp_text __section(.hyp.text) notrace __noscs

#define read_sysreg_elx(r,nvh,vh) \
({ \
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-16 20:40:56

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v11 05/12] arm64: reserve x18 from general allocation with SCS

Reserve the x18 register from general allocation when SCS is enabled,
because the compiler uses the register to store the current task's
shadow stack pointer. Note that all external kernel modules must also be
compiled with -ffixed-x18 if the kernel has SCS enabled.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/Makefile | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 85e4149cc5d5..409a6c1be8cc 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -81,6 +81,10 @@ endif

KBUILD_CFLAGS += $(branch-prot-flags-y)

+ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
+KBUILD_CFLAGS += -ffixed-x18
+endif
+
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
CHECKFLAGS += -D__AARCH64EB__
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-17 10:04:54

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v11 04/12] scs: disable when function graph tracing is enabled

On Thu, Apr 16, 2020 at 09:12:37AM -0700, Sami Tolvanen wrote:
> The graph tracer hooks returns by modifying frame records on the
> (regular) stack, but with SCS the return address is taken from the
> shadow stack, and the value in the frame record has no effect. As we
> don't currently have a mechanism to determine the corresponding slot
> on the shadow stack (and to pass this through the ftrace
> infrastructure), for now let's disable SCS when the graph tracer is
> enabled.
>
> With SCS the return address is taken from the shadow stack and the
> value in the frame record has no effect. The mcount based graph tracer
> hooks returns by modifying frame records on the (regular) stack, and
> thus is not compatible. The patchable-function-entry graph tracer
> used for DYNAMIC_FTRACE_WITH_REGS modifies the LR before it is saved
> to the shadow stack, and is compatible.
>
> Modifying the mcount based graph tracer to work with SCS would require
> a mechanism to determine the corresponding slot on the shadow stack
> (and to pass this through the ftrace infrastructure), and we expect
> that everyone will eventually move to the patchable-function-entry
> based graph tracer anyway, so for now let's disable SCS when the
> mcount-based graph tracer is enabled.
>
> SCS and patchable-function-entry are both supported from LLVM 10.x.

SCS would actually provide another way to do return hooking. An arguably
much saner model at that.

The 'normal' way is to (temporary) replace the on-stack return value,
and then replace it back in the return handler. This is because we can't
simply push a fake return on the stack, because that would wreck the
expected stack layout of the regular function.

But there is nothing that would stop us from pushing an extra entry on
the SCS. It would in fact be a much cleaner solution. The entry hook
sticks an extra entry on the SCS, the function ignores what's on the
normal stack and pops from the SCS, we return to the exit handler, which
in turn pops from the SCS stack at which point we're back to regular.

The only 'funny' is that the exit handler itself should not push to the
SCS, or we should frob the return-to-exit-handler such that it lands
after the push.

> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> Reviewed-by: Mark Rutland <[email protected]>
> ---
> arch/Kconfig | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 691a552c2cc3..c53cb9025ad2 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -542,6 +542,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
>
> config SHADOW_CALL_STACK
> bool "Clang Shadow Call Stack"
> + depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
> depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
> help
> This option enables Clang's Shadow Call Stack, which uses a

AFAICT you also need to kill KRETPROBES, which plays similar games. And
doesn't BPF also do stuff like this?

2020-04-17 14:48:17

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v11 04/12] scs: disable when function graph tracing is enabled

Hi Peter,

On Fri, Apr 17, 2020 at 12:00:39PM +0200, Peter Zijlstra wrote:
> On Thu, Apr 16, 2020 at 09:12:37AM -0700, Sami Tolvanen wrote:
> > The graph tracer hooks returns by modifying frame records on the
> > (regular) stack, but with SCS the return address is taken from the
> > shadow stack, and the value in the frame record has no effect. As we
> > don't currently have a mechanism to determine the corresponding slot
> > on the shadow stack (and to pass this through the ftrace
> > infrastructure), for now let's disable SCS when the graph tracer is
> > enabled.
> >
> > With SCS the return address is taken from the shadow stack and the
> > value in the frame record has no effect. The mcount based graph tracer
> > hooks returns by modifying frame records on the (regular) stack, and
> > thus is not compatible. The patchable-function-entry graph tracer
> > used for DYNAMIC_FTRACE_WITH_REGS modifies the LR before it is saved
> > to the shadow stack, and is compatible.
> >
> > Modifying the mcount based graph tracer to work with SCS would require
> > a mechanism to determine the corresponding slot on the shadow stack
> > (and to pass this through the ftrace infrastructure), and we expect
> > that everyone will eventually move to the patchable-function-entry
> > based graph tracer anyway, so for now let's disable SCS when the
> > mcount-based graph tracer is enabled.
> >
> > SCS and patchable-function-entry are both supported from LLVM 10.x.
>
> SCS would actually provide another way to do return hooking. An arguably
> much saner model at that.
>
> The 'normal' way is to (temporary) replace the on-stack return value,
> and then replace it back in the return handler. This is because we can't
> simply push a fake return on the stack, because that would wreck the
> expected stack layout of the regular function.
>
> But there is nothing that would stop us from pushing an extra entry on
> the SCS. It would in fact be a much cleaner solution. The entry hook
> sticks an extra entry on the SCS, the function ignores what's on the
> normal stack and pops from the SCS, we return to the exit handler, which
> in turn pops from the SCS stack at which point we're back to regular.

For background: on arm64 we wanted to use DYNAMIC_FTRACE_WITH_REGS since
we already have to use that to handle pointer authentication, and didn't
want to gain more ways of implementing ftrace.

Arguably we should move the dependency into the arm64 Kconfig for
ARCH_SUPPORTS_SHADOW_CALL_STACK.

> The only 'funny' is that the exit handler itself should not push to the
> SCS, or we should frob the return-to-exit-handler such that it lands
> after the push.
>
> > Signed-off-by: Sami Tolvanen <[email protected]>
> > Reviewed-by: Kees Cook <[email protected]>
> > Reviewed-by: Mark Rutland <[email protected]>
> > ---
> > arch/Kconfig | 1 +
> > 1 file changed, 1 insertion(+)
> >
> > diff --git a/arch/Kconfig b/arch/Kconfig
> > index 691a552c2cc3..c53cb9025ad2 100644
> > --- a/arch/Kconfig
> > +++ b/arch/Kconfig
> > @@ -542,6 +542,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
> >
> > config SHADOW_CALL_STACK
> > bool "Clang Shadow Call Stack"
> > + depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
> > depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
> > help
> > This option enables Clang's Shadow Call Stack, which uses a

> AFAICT you also need to kill KRETPROBES, which plays similar games.

Hmm... how does KREPROBES work? If you can only mess with the return
address when probing the first instruction in the function, it'll just
work for SCS or pointer authentication, as the LR is used at that
instant. If KRETPROBES tries to mess with the return address elsewhere
it'd be broken today...

> And doesn't BPF also do stuff like this?

Can BPF mess with return addresses now!?

Thanks,
Mark.

2020-04-17 15:29:39

by Peter Zijlstra

[permalink] [raw]
Subject: Re: [PATCH v11 04/12] scs: disable when function graph tracing is enabled

On Fri, Apr 17, 2020 at 03:46:21PM +0100, Mark Rutland wrote:
> > > diff --git a/arch/Kconfig b/arch/Kconfig
> > > index 691a552c2cc3..c53cb9025ad2 100644
> > > --- a/arch/Kconfig
> > > +++ b/arch/Kconfig
> > > @@ -542,6 +542,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
> > >
> > > config SHADOW_CALL_STACK
> > > bool "Clang Shadow Call Stack"
> > > + depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
> > > depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
> > > help
> > > This option enables Clang's Shadow Call Stack, which uses a
>
> > AFAICT you also need to kill KRETPROBES, which plays similar games.
>
> Hmm... how does KREPROBES work? If you can only mess with the return
> address when probing the first instruction in the function, it'll just
> work for SCS or pointer authentication, as the LR is used at that
> instant. If KRETPROBES tries to mess with the return address elsewhere
> it'd be broken today...

To be fair, I've not looked at the arm64 implementation. x86 does gross
things like ftrace does. On x86 ftrace_graph and kretprobe also can't
be on at the same time for the same function, there's some yuck around
there.

Rostedt was recently talking about cleaning some of that up.

But if kretprobe can work on arm64, then ftrace_graph can too, but I
think that links back to what you said earlier, you didn't want more
ftrace variants or something.

> > And doesn't BPF also do stuff like this?
>
> Can BPF mess with return addresses now!?

At least on x86 I think it does. But what do I know, I can't operate
that stuff. Rostedt might know.

2020-04-17 15:50:05

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v11 04/12] scs: disable when function graph tracing is enabled

On Fri, Apr 17, 2020 at 05:26:45PM +0200, Peter Zijlstra wrote:
> On Fri, Apr 17, 2020 at 03:46:21PM +0100, Mark Rutland wrote:
> > > > diff --git a/arch/Kconfig b/arch/Kconfig
> > > > index 691a552c2cc3..c53cb9025ad2 100644
> > > > --- a/arch/Kconfig
> > > > +++ b/arch/Kconfig
> > > > @@ -542,6 +542,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
> > > >
> > > > config SHADOW_CALL_STACK
> > > > bool "Clang Shadow Call Stack"
> > > > + depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
> > > > depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
> > > > help
> > > > This option enables Clang's Shadow Call Stack, which uses a
> >
> > > AFAICT you also need to kill KRETPROBES, which plays similar games.
> >
> > Hmm... how does KREPROBES work? If you can only mess with the return
> > address when probing the first instruction in the function, it'll just
> > work for SCS or pointer authentication, as the LR is used at that
> > instant. If KRETPROBES tries to mess with the return address elsewhere
> > it'd be broken today...
>
> To be fair, I've not looked at the arm64 implementation. x86 does gross
> things like ftrace does. On x86 ftrace_graph and kretprobe also can't
> be on at the same time for the same function, there's some yuck around
> there.

I can imagine the same holds true for us there.

> Rostedt was recently talking about cleaning some of that up.
>
> But if kretprobe can work on arm64, then ftrace_graph can too, but I
> think that links back to what you said earlier, you didn't want more
> ftrace variants or something.

I just want to avoid yet another implementation of the underlying
mechanism. For DYNAMIC_FTRACE_WITH_REGS we can mess with the LR before
pauth or SCS sees it, so those definitely work.

If KRETPROBES works by messing with the LR at the instnat the function
is entered, that should work similarly. If it works by replacing the
RET it should also work out since any pauth/SCS work will have been
undone by that point. If it attempts to mess with the return address in
the middle of a function then it's not reliable today.

I'll take a look, since

> > > And doesn't BPF also do stuff like this?
> >
> > Can BPF mess with return addresses now!?
>
> At least on x86 I think it does. But what do I know, I can't operate
> that stuff. Rostedt might know.

Sounds like I might need to do some digging...

Mark.

2020-04-17 23:21:18

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v11 04/12] scs: disable when function graph tracing is enabled

On Fri, Apr 17, 2020 at 04:46:14PM +0100, Mark Rutland wrote:
> If KRETPROBES works by messing with the LR at the instnat the function
> is entered, that should work similarly. If it works by replacing the
> RET it should also work out since any pauth/SCS work will have been
> undone by that point. If it attempts to mess with the return address in
> the middle of a function then it's not reliable today.

I did initially have a patch to disable kretprobes (until v5), but as
Mark pointed out back then, the return address is modified before it
gets pushed to the shadow stack, so there was no conflict with SCS. I
confirmed this on arm64, but haven't looked at other architectures.

Sami

2020-04-20 17:19:21

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v11 03/12] scs: add support for stack usage debugging

On Thu, Apr 16, 2020 at 09:12:36AM -0700, Sami Tolvanen wrote:
> Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks. When enabled,
> also prints out the highest shadow stack usage per process.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
> kernel/scs.c | 39 +++++++++++++++++++++++++++++++++++++++
> 1 file changed, 39 insertions(+)
>
> diff --git a/kernel/scs.c b/kernel/scs.c
> index 5245e992c692..ad74d13f2c0f 100644
> --- a/kernel/scs.c
> +++ b/kernel/scs.c
> @@ -184,6 +184,44 @@ int scs_prepare(struct task_struct *tsk, int node)
> return 0;
> }
>
> +#ifdef CONFIG_DEBUG_STACK_USAGE
> +static inline unsigned long scs_used(struct task_struct *tsk)
> +{
> + unsigned long *p = __scs_base(tsk);
> + unsigned long *end = scs_magic(p);
> + unsigned long s = (unsigned long)p;
> +
> + while (p < end && READ_ONCE_NOCHECK(*p))
> + p++;

I think the expectation is that the caller has already checked that the
stack is not corrupted, so I'd probably throw a couple of underscores
in front of the function name, along with a comment.

Also, is tsk ever != current?

> +
> + return (unsigned long)p - s;
> +}
> +
> +static void scs_check_usage(struct task_struct *tsk)
> +{
> + static DEFINE_SPINLOCK(lock);
> + static unsigned long highest;
> + unsigned long used = scs_used(tsk);
> +
> + if (used <= highest)
> + return;
> +
> + spin_lock(&lock);
> +
> + if (used > highest) {
> + pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
> + tsk->comm, task_pid_nr(tsk), used);
> + highest = used;
> + }
> +
> + spin_unlock(&lock);

Do you really need this lock? I'd have thought you could cmpxchg()
highest instead.

Will

2020-04-20 17:20:01

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v11 02/12] scs: add accounting

On Thu, Apr 16, 2020 at 09:12:35AM -0700, Sami Tolvanen wrote:
> This change adds accounting for the memory allocated for shadow stacks.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
> drivers/base/node.c | 6 ++++++
> fs/proc/meminfo.c | 4 ++++
> include/linux/mmzone.h | 3 +++
> kernel/scs.c | 20 ++++++++++++++++++++
> mm/page_alloc.c | 6 ++++++
> mm/vmstat.c | 3 +++
> 6 files changed, 42 insertions(+)
>
> diff --git a/drivers/base/node.c b/drivers/base/node.c
> index 10d7e818e118..502ab5447c8d 100644
> --- a/drivers/base/node.c
> +++ b/drivers/base/node.c
> @@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev,
> "Node %d AnonPages: %8lu kB\n"
> "Node %d Shmem: %8lu kB\n"
> "Node %d KernelStack: %8lu kB\n"
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + "Node %d ShadowCallStack:%8lu kB\n"
> +#endif
> "Node %d PageTables: %8lu kB\n"
> "Node %d NFS_Unstable: %8lu kB\n"
> "Node %d Bounce: %8lu kB\n"
> @@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev,
> nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
> nid, K(i.sharedram),
> nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
> +#ifdef CONFIG_SHADOW_CALL_STACK
> + nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024,
> +#endif

Why not just use KB everywhere instead of repeated division by 1024?

Will

2020-04-20 17:21:01

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v11 01/12] add support for Clang's Shadow Call Stack (SCS)

Hi Sami,

Comments inline.

On Thu, Apr 16, 2020 at 09:12:34AM -0700, Sami Tolvanen wrote:
> This change adds generic support for Clang's Shadow Call Stack,
> which uses a shadow stack to protect return addresses from being
> overwritten by an attacker. Details are available here:
>
> https://clang.llvm.org/docs/ShadowCallStack.html
>
> Note that security guarantees in the kernel differ from the
> ones documented for user space. The kernel must store addresses
> of shadow stacks used by other tasks and interrupt handlers in
> memory, which means an attacker capable reading and writing
> arbitrary memory may be able to locate them and hijack control
> flow by modifying shadow stacks that are not currently in use.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> Reviewed-by: Miguel Ojeda <[email protected]>
> ---
> Makefile | 6 ++
> arch/Kconfig | 34 ++++++
> include/linux/compiler-clang.h | 6 ++
> include/linux/compiler_types.h | 4 +
> include/linux/scs.h | 57 ++++++++++
> init/init_task.c | 8 ++
> kernel/Makefile | 1 +
> kernel/fork.c | 9 ++
> kernel/sched/core.c | 2 +
> kernel/scs.c | 187 +++++++++++++++++++++++++++++++++
> 10 files changed, 314 insertions(+)
> create mode 100644 include/linux/scs.h
> create mode 100644 kernel/scs.c
>
> diff --git a/Makefile b/Makefile
> index 70def4907036..baea6024b409 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -866,6 +866,12 @@ ifdef CONFIG_LIVEPATCH
> KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
> endif
>
> +ifdef CONFIG_SHADOW_CALL_STACK
> +CC_FLAGS_SCS := -fsanitize=shadow-call-stack
> +KBUILD_CFLAGS += $(CC_FLAGS_SCS)
> +export CC_FLAGS_SCS
> +endif

CFLAGS_SCS would seem more natural to me, although I see ftrace does it this
way.

> # arch Makefile may override CC so keep this after arch Makefile is included
> NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 786a85d4ad40..691a552c2cc3 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -533,6 +533,40 @@ config STACKPROTECTOR_STRONG
> about 20% of all kernel functions, which increases the kernel code
> size by about 2%.
>
> +config ARCH_SUPPORTS_SHADOW_CALL_STACK
> + bool
> + help
> + An architecture should select this if it supports Clang's Shadow
> + Call Stack, has asm/scs.h, and implements runtime support for shadow
> + stack switching.
> +
> +config SHADOW_CALL_STACK
> + bool "Clang Shadow Call Stack"
> + depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
> + help
> + This option enables Clang's Shadow Call Stack, which uses a
> + shadow stack to protect function return addresses from being
> + overwritten by an attacker. More information can be found in
> + Clang's documentation:
> +
> + https://clang.llvm.org/docs/ShadowCallStack.html
> +
> + Note that security guarantees in the kernel differ from the ones
> + documented for user space. The kernel must store addresses of shadow
> + stacks used by other tasks and interrupt handlers in memory, which
> + means an attacker capable of reading and writing arbitrary memory
> + may be able to locate them and hijack control flow by modifying
> + shadow stacks that are not currently in use.

Shouldn't some of this depend on CC_IS_CLANG?

> +config SHADOW_CALL_STACK_VMAP
> + bool "Use virtually mapped shadow call stacks"
> + depends on SHADOW_CALL_STACK
> + help
> + Use virtually mapped shadow call stacks. Selecting this option
> + provides better stack exhaustion protection, but increases per-thread
> + memory consumption as a full page is allocated for each shadow stack.

Given that this feature applies only to arm64 kernels built with clang, it
feels weird to further segment that userbase with another config option.
Does Android enable SHADOW_CALL_STACK_VMAP? If not, maybe we should ditch
it for now and add it when we have a user.

> diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
> index e970f97a7fcb..97b62f47a80d 100644
> --- a/include/linux/compiler_types.h
> +++ b/include/linux/compiler_types.h
> @@ -193,6 +193,10 @@ struct ftrace_likely_data {
> # define randomized_struct_fields_end
> #endif
>
> +#ifndef __noscs
> +# define __noscs
> +#endif
> +
> #ifndef asm_volatile_goto
> #define asm_volatile_goto(x...) asm goto(x)
> #endif
> diff --git a/include/linux/scs.h b/include/linux/scs.h
> new file mode 100644
> index 000000000000..c5572fd770b0
> --- /dev/null
> +++ b/include/linux/scs.h
> @@ -0,0 +1,57 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Shadow Call Stack support.
> + *
> + * Copyright (C) 2019 Google LLC
> + */
> +
> +#ifndef _LINUX_SCS_H
> +#define _LINUX_SCS_H
> +
> +#include <linux/gfp.h>
> +#include <linux/sched.h>
> +#include <asm/page.h>
> +
> +#ifdef CONFIG_SHADOW_CALL_STACK
> +
> +/*
> + * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
> + * architecture) provided ~40% safety margin on stack usage while keeping
> + * memory allocation overhead reasonable.
> + */
> +#define SCS_SIZE 1024UL
> +#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
> +
> +/*
> + * A random number outside the kernel's virtual address space to mark the
> + * end of the shadow stack.
> + */
> +#define SCS_END_MAGIC 0xaf0194819b1635f6UL

This seems like it might be arm64-specific. Why not choose something based
off CONFIG_ILLEGAL_POINTER_VALUE (see linux/poison.h)?

> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 3a61a3b8eaa9..c99620c1ec20 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -11,6 +11,7 @@
> #include <linux/nospec.h>
>
> #include <linux/kcov.h>
> +#include <linux/scs.h>
>
> #include <asm/switch_to.h>
> #include <asm/tlb.h>
> @@ -6045,6 +6046,7 @@ void init_idle(struct task_struct *idle, int cpu)
> idle->se.exec_start = sched_clock();
> idle->flags |= PF_IDLE;
>
> + scs_task_reset(idle);
> kasan_unpoison_task_stack(idle);
>
> #ifdef CONFIG_SMP
> diff --git a/kernel/scs.c b/kernel/scs.c
> new file mode 100644
> index 000000000000..28abed21950c
> --- /dev/null
> +++ b/kernel/scs.c
> @@ -0,0 +1,187 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Shadow Call Stack support.
> + *
> + * Copyright (C) 2019 Google LLC
> + */
> +
> +#include <linux/cpuhotplug.h>
> +#include <linux/kasan.h>
> +#include <linux/mm.h>
> +#include <linux/mmzone.h>
> +#include <linux/scs.h>
> +#include <linux/slab.h>
> +#include <linux/vmalloc.h>
> +#include <asm/scs.h>
> +
> +static inline void *__scs_base(struct task_struct *tsk)

Please avoid using 'inline' in C files unless there's a good reason not
to let the compiler figure it out.

> +{
> + /*
> + * To minimize risk the of exposure, architectures may clear a

Should be "the risk of exposure".

> + * task's thread_info::shadow_call_stack while that task is
> + * running, and only save/restore the active shadow call stack
> + * pointer when the usual register may be clobbered (e.g. across
> + * context switches).
> + *
> + * The shadow call stack is aligned to SCS_SIZE, and grows
> + * upwards, so we can mask out the low bits to extract the base
> + * when the task is not running.
> + */
> + return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));

Could we avoid forcing this alignment it we stored the SCS pointer as a
(base,offset) pair instead? That might be friendlier on the allocations
later on.

> +}
> +
> +static inline unsigned long *scs_magic(void *s)
> +{
> + return (unsigned long *)(s + SCS_SIZE) - 1;
> +}
> +
> +static inline void scs_set_magic(void *s)
> +{
> + *scs_magic(s) = SCS_END_MAGIC;

You added task_set_scs() for this sort of thing, so I'm not convinced you
need this extra helper.

> +}
> +
> +#ifdef CONFIG_SHADOW_CALL_STACK_VMAP
> +
> +/* Matches NR_CACHED_STACKS for VMAP_STACK */
> +#define NR_CACHED_SCS 2

Then they should probably both be derived from the same thing?

> +static DEFINE_PER_CPU(void *, scs_cache[NR_CACHED_SCS]);
> +
> +static void *scs_alloc(int node)
> +{
> + int i;
> + void *s;
> +
> + for (i = 0; i < NR_CACHED_SCS; i++) {
> + s = this_cpu_xchg(scs_cache[i], NULL);

Might be worth a comment about the re-entrancy here.

> + if (s) {
> + memset(s, 0, SCS_SIZE);
> + goto out;
> + }
> + }
> +
> + /*
> + * We allocate a full page for the shadow stack, which should be
> + * more than we need. Check the assumption nevertheless.
> + */
> + BUILD_BUG_ON(SCS_SIZE > PAGE_SIZE);
> +
> + s = __vmalloc_node_range(PAGE_SIZE, SCS_SIZE,
> + VMALLOC_START, VMALLOC_END,
> + GFP_SCS, PAGE_KERNEL, 0,
> + node, __builtin_return_address(0));
> +
> +out:
> + if (s)
> + scs_set_magic(s);
> + /* TODO: poison for KASAN, unpoison in scs_free */

We don't usually commit these. What's missing?

> +
> + return s;
> +}
> +
> +static void scs_free(void *s)
> +{
> + int i;
> +
> + for (i = 0; i < NR_CACHED_SCS; i++)
> + if (this_cpu_cmpxchg(scs_cache[i], 0, s) == NULL)
> + return;
> +
> + vfree_atomic(s);
> +}
> +
> +static int scs_cleanup(unsigned int cpu)
> +{
> + int i;
> + void **cache = per_cpu_ptr(scs_cache, cpu);
> +
> + for (i = 0; i < NR_CACHED_SCS; i++) {
> + vfree(cache[i]);
> + cache[i] = NULL;
> + }

Hmm, can this run concurrently with another CPU doing a stack allocation
with this_cpu_cmpxchg()? It probably works out on arm64 thanks to the use
of atomics, but we shouldn't be relying on that in core code.

> +
> + return 0;
> +}
> +
> +void __init scs_init(void)
> +{
> + WARN_ON(cpuhp_setup_state(CPUHP_BP_PREPARE_DYN, "scs:scs_cache", NULL,
> + scs_cleanup) < 0);
> +}
> +
> +#else /* !CONFIG_SHADOW_CALL_STACK_VMAP */
> +
> +static struct kmem_cache *scs_cache;
> +
> +static inline void *scs_alloc(int node)
> +{
> + void *s;
> +
> + s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
> + if (s) {
> + scs_set_magic(s);
> + /*
> + * Poison the allocation to catch unintentional accesses to
> + * the shadow stack when KASAN is enabled.
> + */
> + kasan_poison_object_data(scs_cache, s);
> + }
> +
> + return s;
> +}
> +
> +static inline void scs_free(void *s)
> +{
> + kasan_unpoison_object_data(scs_cache, s);
> + kmem_cache_free(scs_cache, s);
> +}
> +
> +void __init scs_init(void)
> +{
> + scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
> + 0, NULL);
> + WARN_ON(!scs_cache);

Memory allocation failure should be noisy enough without this.

> +}
> +
> +#endif /* CONFIG_SHADOW_CALL_STACK_VMAP */
> +
> +void scs_task_reset(struct task_struct *tsk)
> +{
> + /*
> + * Reset the shadow stack to the base address in case the task
> + * is reused.
> + */
> + task_set_scs(tsk, __scs_base(tsk));
> +}

Why isn't this in the header?

> +
> +int scs_prepare(struct task_struct *tsk, int node)
> +{
> + void *s;
> +
> + s = scs_alloc(node);
> + if (!s)
> + return -ENOMEM;
> +
> + task_set_scs(tsk, s);
> + return 0;
> +}
> +
> +bool scs_corrupted(struct task_struct *tsk)
> +{
> + unsigned long *magic = scs_magic(__scs_base(tsk));
> +
> + return READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
> +}

Same here.

> +
> +void scs_release(struct task_struct *tsk)
> +{
> + void *s;
> +
> + s = __scs_base(tsk);
> + if (!s)
> + return;
> +
> + WARN_ON(scs_corrupted(tsk));
> +
> + task_set_scs(tsk, NULL);

Aren't we about to free the task here? What does clearing the scs pointer
achieve?

Will

2020-04-20 19:24:44

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH v11 04/12] scs: disable when function graph tracing is enabled

On Fri, 17 Apr 2020 16:46:14 +0100
Mark Rutland <[email protected]> wrote:

> > > > And doesn't BPF also do stuff like this?
> > >
> > > Can BPF mess with return addresses now!?
> >
> > At least on x86 I think it does. But what do I know, I can't operate
> > that stuff. Rostedt might know.
>
> Sounds like I might need to do some digging...

May want to ping Alexei. It appears that if BPF adds a direct hook to a
function, it will prevent the function graph tracer from tracing it. :-/

-- Steve

2020-04-20 21:20:34

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v11 01/12] add support for Clang's Shadow Call Stack (SCS)

On Mon, Apr 20, 2020 at 06:17:28PM +0100, Will Deacon wrote:
> > +ifdef CONFIG_SHADOW_CALL_STACK
> > +CC_FLAGS_SCS := -fsanitize=shadow-call-stack
> > +KBUILD_CFLAGS += $(CC_FLAGS_SCS)
> > +export CC_FLAGS_SCS
> > +endif
>
> CFLAGS_SCS would seem more natural to me, although I see ftrace does it this
> way.

Right, I followed ftrace's example here.

> > +config SHADOW_CALL_STACK
> > + bool "Clang Shadow Call Stack"
> > + depends on ARCH_SUPPORTS_SHADOW_CALL_STACK
> > + help
> > + This option enables Clang's Shadow Call Stack, which uses a
> > + shadow stack to protect function return addresses from being
> > + overwritten by an attacker. More information can be found in
> > + Clang's documentation:
> > +
> > + https://clang.llvm.org/docs/ShadowCallStack.html
> > +
> > + Note that security guarantees in the kernel differ from the ones
> > + documented for user space. The kernel must store addresses of shadow
> > + stacks used by other tasks and interrupt handlers in memory, which
> > + means an attacker capable of reading and writing arbitrary memory
> > + may be able to locate them and hijack control flow by modifying
> > + shadow stacks that are not currently in use.
>
> Shouldn't some of this depend on CC_IS_CLANG?

Sure, I'll add CC_IS_CLANG here in the next version. Note that we do
check for compiler support before selecting ARCH_SUPPORTS_SHADOW_CALL_STACK.
The flags are architecture-specific, so the check is done in the arch Kconfig.

> > +config SHADOW_CALL_STACK_VMAP
> > + bool "Use virtually mapped shadow call stacks"
> > + depends on SHADOW_CALL_STACK
> > + help
> > + Use virtually mapped shadow call stacks. Selecting this option
> > + provides better stack exhaustion protection, but increases per-thread
> > + memory consumption as a full page is allocated for each shadow stack.
>
> Given that this feature applies only to arm64 kernels built with clang, it
> feels weird to further segment that userbase with another config option.
> Does Android enable SHADOW_CALL_STACK_VMAP? If not, maybe we should ditch
> it for now and add it when we have a user.

Android doesn't enable the VMAP option right now due to increased memory
overhead. I'll drop it from v12.

> > +/*
> > + * A random number outside the kernel's virtual address space to mark the
> > + * end of the shadow stack.
> > + */
> > +#define SCS_END_MAGIC 0xaf0194819b1635f6UL
>
> This seems like it might be arm64-specific. Why not choose something based
> off CONFIG_ILLEGAL_POINTER_VALUE (see linux/poison.h)?

Sure, I'll use POISON_POINTER_DELTA here.

> > +static inline void *__scs_base(struct task_struct *tsk)
>
> Please avoid using 'inline' in C files unless there's a good reason not
> to let the compiler figure it out.

Ack.

> > +{
> > + /*
> > + * To minimize risk the of exposure, architectures may clear a
>
> Should be "the risk of exposure".

Thanks.

> > + * The shadow call stack is aligned to SCS_SIZE, and grows
> > + * upwards, so we can mask out the low bits to extract the base
> > + * when the task is not running.
> > + */
> > + return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
>
> Could we avoid forcing this alignment it we stored the SCS pointer as a
> (base,offset) pair instead? That might be friendlier on the allocations
> later on.

The idea is to avoid storing the current task's shadow stack address in
memory, which is why I would rather not store the base address either.

> > +static inline void scs_set_magic(void *s)
> > +{
> > + *scs_magic(s) = SCS_END_MAGIC;
>
> You added task_set_scs() for this sort of thing, so I'm not convinced you
> need this extra helper.

Agreed, I'll drop this.

> > + if (s)
> > + scs_set_magic(s);
> > + /* TODO: poison for KASAN, unpoison in scs_free */
>
> We don't usually commit these. What's missing?

At the time, KASAN didn't support poisoning vmalloc'ed memory, but looks
like that was fixed a while back.

> > +static int scs_cleanup(unsigned int cpu)
> > +{
> > + int i;
> > + void **cache = per_cpu_ptr(scs_cache, cpu);
> > +
> > + for (i = 0; i < NR_CACHED_SCS; i++) {
> > + vfree(cache[i]);
> > + cache[i] = NULL;
> > + }
>
> Hmm, can this run concurrently with another CPU doing a stack allocation
> with this_cpu_cmpxchg()? It probably works out on arm64 thanks to the use
> of atomics, but we shouldn't be relying on that in core code.

This is essentially identical to the code in kernel/fork.c. Anyway, all
of this code goes away with the VMAP option.

> > +void __init scs_init(void)
> > +{
> > + scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
> > + 0, NULL);
> > + WARN_ON(!scs_cache);
>
> Memory allocation failure should be noisy enough without this.

Sure, I'll remove the warning.

> > +void scs_task_reset(struct task_struct *tsk)
> > +{
> > + /*
> > + * Reset the shadow stack to the base address in case the task
> > + * is reused.
> > + */
> > + task_set_scs(tsk, __scs_base(tsk));
> > +}
>
> Why isn't this in the header?

> > +bool scs_corrupted(struct task_struct *tsk)
> > +{
> > + unsigned long *magic = scs_magic(__scs_base(tsk));
> > +
> > + return READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
> > +}
>
> Same here.

I'll move both to the header file.

> > +void scs_release(struct task_struct *tsk)
> > +{
> > + void *s;
> > +
> > + s = __scs_base(tsk);
> > + if (!s)
> > + return;
> > +
> > + WARN_ON(scs_corrupted(tsk));
> > +
> > + task_set_scs(tsk, NULL);
>
> Aren't we about to free the task here? What does clearing the scs pointer
> achieve?

True, it doesn't achieve much, only leaves one fewer shadow stack pointer
in memory. I'll drop this from the next version.

Sami

2020-04-20 21:25:20

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v11 02/12] scs: add accounting

On Mon, Apr 20, 2020 at 06:17:55PM +0100, Will Deacon wrote:
> > +#ifdef CONFIG_SHADOW_CALL_STACK
> > + nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_BYTES) / 1024,
> > +#endif
>
> Why not just use KB everywhere instead of repeated division by 1024?

This was to correctly calculate memory usage with shadow stacks <1024
bytes. I don't think we need that anymore, so I'll change this to _KB in
the next version.

Sami

2020-04-20 22:26:01

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v11 03/12] scs: add support for stack usage debugging

On Mon, Apr 20, 2020 at 06:17:42PM +0100, Will Deacon wrote:
> > +#ifdef CONFIG_DEBUG_STACK_USAGE
> > +static inline unsigned long scs_used(struct task_struct *tsk)
> > +{
> > + unsigned long *p = __scs_base(tsk);
> > + unsigned long *end = scs_magic(p);
> > + unsigned long s = (unsigned long)p;
> > +
> > + while (p < end && READ_ONCE_NOCHECK(*p))
> > + p++;
>
> I think the expectation is that the caller has already checked that the
> stack is not corrupted, so I'd probably throw a couple of underscores
> in front of the function name, along with a comment.

Correct. I'll do that.

> Also, is tsk ever != current?

This is only called from scs_release(), so tsk is never current.

> > +static void scs_check_usage(struct task_struct *tsk)
> > +{
> > + static DEFINE_SPINLOCK(lock);
> > + static unsigned long highest;
> > + unsigned long used = scs_used(tsk);
> > +
> > + if (used <= highest)
> > + return;
> > +
> > + spin_lock(&lock);
> > +
> > + if (used > highest) {
> > + pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
> > + tsk->comm, task_pid_nr(tsk), used);
> > + highest = used;
> > + }
> > +
> > + spin_unlock(&lock);
>
> Do you really need this lock? I'd have thought you could cmpxchg()
> highest instead.

This is similar to check_stack_usage in kernel/exit.c, but yes, I can
change this to a cmpxchg() loop instead.

Sami

2020-04-21 01:14:31

by Steven Rostedt

[permalink] [raw]
Subject: Re: [PATCH v11 01/12] add support for Clang's Shadow Call Stack (SCS)

On Mon, 20 Apr 2020 18:17:28 +0100
Will Deacon <[email protected]> wrote:

> > +ifdef CONFIG_SHADOW_CALL_STACK
> > +CC_FLAGS_SCS := -fsanitize=shadow-call-stack
> > +KBUILD_CFLAGS += $(CC_FLAGS_SCS)
> > +export CC_FLAGS_SCS
> > +endif
>
> CFLAGS_SCS would seem more natural to me, although I see ftrace does it this
> way.

The CC_FLAGS_FTRACE was added by Heiko Carstens, and the "CC_FLAGS_"
appears to be a common usage in s390 :-)

That said, I like the CC_FLAGS_ notation, because the Linux build
system uses CFLAGS_* as commands:

CFLAGS_foo.o = x
CFLAGS_REMOVE_foo.o = y

And "CC_FLAGS_" is only for new flags and easy to search for.

-- Steve

2020-04-21 02:16:23

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v12 00/12] add support for Clang's Shadow Call Stack

This patch series adds support for Clang's Shadow Call Stack
(SCS) mitigation, which uses a separately allocated shadow stack
to protect against return address overwrites. More information
can be found here:

https://clang.llvm.org/docs/ShadowCallStack.html

SCS provides better protection against traditional buffer
overflows than CONFIG_STACKPROTECTOR_*, but it should be noted
that SCS security guarantees in the kernel differ from the ones
documented for user space. The kernel must store addresses of
shadow stacks used by inactive tasks and interrupt handlers in
memory, which means an attacker capable of reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

SCS is currently supported only on arm64, where the compiler
requires the x18 register to be reserved for holding the current
task's shadow stack pointer.

With -fsanitize=shadow-call-stack, the compiler injects
instructions to all non-leaf C functions to store the return
address to the shadow stack, and unconditionally load it again
before returning. As a result, SCS is incompatible with features
that rely on modifying function return addresses in the kernel
stack to alter control flow. A copy of the return address is
still kept in the kernel stack for compatibility with stack
unwinding, for example.

SCS has a minimal performance overhead, but allocating
shadow stacks increases kernel memory usage. The feature is
therefore mostly useful on hardware that lacks support for PAC
instructions.

Changes in v12:
- Removed CONFIG_SHADOW_CALL_STACK_VMAP.
- Added CC_IS_CLANG as a dependency to CONFIG_SHADOW_CALL_STACK.
- Changed SCS_END_MAGIC to use POISON_POINTER_DELTA.
- Removed the unnecessary scs_set_magic() helper function.
- Moved scs_task_reset() and scs_corrupted() to scs.h, along with
__scs_magic() and __scs_base().
- Removed a redundant warning from memory allocation.
- Removed an unnecessary task_set_scs() call from scs_release().
- Changed the accounting code to calculate KiB instead of bytes.
- Replaced the lock in scs_check_usage() with a cmpxchg() loop.

Changes in v11:
- Rebased, added maintainers for kernel/ changes.

Changes in v10:
- Removed an unnecessary <asm/scs.h> include from head.S.

Changes in v9:
- Fixed grammar in the Kconfig help text.
- Changed Kconfig to allow SCS to be selected with the patchable-
function-entry graph tracer.
- Changed the EFI stub patch to not filter out -ffixed-x18, only
SCS flags.

Changes in v8:
- Added __noscs to __hyp_text instead of filtering SCS flags from
the entire arch/arm64/kvm/hyp directory.
- Added a patch to filter out -ffixed-x18 and SCS flags from the
EFI stub.

Changes in v7:
- Changed irq_stack_entry/exit to store the shadow stack pointer
in x24 instead of x20 as kernel_entry uses x20-x23 to store
data that can be used later. Updated the comment as well.
- Changed the Makefile in arch/arm64/kvm/hyp to also filter out
-ffixed-x18.
- Changed SHADOW_CALL_STACK to depend on !FUNCTION_GRAPH_TRACER
instead of not selecting HAVE_FUNCTION_GRAPH_TRACER with SCS.
- Removed ifdefs from the EFI wrapper and updated the comment to
explain why we are restoring x18.
- Rebased as Ard's x18 patches that were part of this series have
already been merged.

Changes in v6:
- Updated comment in the EFI RT wrapper to include the
explanation from the commit message.
- Fixed the SHADOW_CALL_STACK_VMAP config option and the
compilation errors in scs_init_irq()
- Updated the comment in entry.S to Mark's suggestion
- Fixed the WARN_ON in scs_init() to trip only when the return
value for cpuhp_setup_state() is < 0.
- Removed ifdefs from the code in arch/arm64/kernel/scs.c and
added separate shadow stacks for the SDEI handler

Changes in v5:
- Updated the comment in __scs_base() to Mark's suggestion
- Changed all instances of uintptr_t to unsigned long
- Added allocation poisoning for KASAN to catch unintentional
shadow stack accesses; moved set_set_magic before poisoning
and switched scs_used() and scs_corrupted() to access the
buffer using READ_ONCE_NOCHECK() instead
- Changed scs_free() to check for NULL instead of zero
- Renamed SCS_CACHE_SIZE to NR_CACHED_SCS
- Added a warning if cpuhp_setup_state fails in scs_init()
- Dropped patches disabling kretprobes after confirming there's
no functional conflict with SCS instrumentation
- Added an explanation to the commit message why function graph
tracing and SCS are incompatible
- Removed the ifdefs from arch/arm64/mm/proc.S and added
comments explaining why we are saving and restoring x18
- Updated scs_check_usage format to include process information

Changes in v4:
- Fixed authorship for Ard's patches
- Added missing commit messages
- Commented code that clears SCS from thread_info
- Added a comment about SCS_END_MAGIC being non-canonical

Changes in v3:
- Switched to filter-out for removing SCS flags in Makefiles
- Changed the __noscs attribute to use __no_sanitize__("...")
instead of no_sanitize("...")
- Cleaned up inline function definitions and moved task_scs()
into a macro
- Cleaned up scs_free() and scs_magic()
- Moved SCS initialization into dup_task_struct() and removed
the now unused scs_task_init()
- Added comments to __scs_base() and scs_task_reset() to better
document design choices
- Changed copy_page to make the offset and bias explicit

Changes in v2:
- Changed Ard's KVM patch to use x29 instead of x18 for the
guest context, which makes restore_callee_saved_regs cleaner
- Updated help text (and commit messages) to point out
differences in security properties compared to user space SCS
- Cleaned up config options: removed the ROP protection choice,
replaced the CC_IS_CLANG dependency with an arch-specific
cc-option test, and moved disabling of incompatible config
options to an arch-specific Kconfig
- Added CC_FLAGS_SCS, which are filtered out where needed
instead of using DISABLE_SCS
- Added a __has_feature guard around __noscs for older clang
versions

Sami Tolvanen (12):
add support for Clang's Shadow Call Stack (SCS)
scs: add accounting
scs: add support for stack usage debugging
scs: disable when function graph tracing is enabled
arm64: reserve x18 from general allocation with SCS
arm64: preserve x18 when CPU is suspended
arm64: efi: restore x18 if it was corrupted
arm64: vdso: disable Shadow Call Stack
arm64: disable SCS for hypervisor code
arm64: implement Shadow Call Stack
arm64: scs: add shadow stacks for SDEI
efi/libstub: disable SCS

Makefile | 6 ++
arch/Kconfig | 26 ++++++
arch/arm64/Kconfig | 5 ++
arch/arm64/Makefile | 4 +
arch/arm64/include/asm/kvm_hyp.h | 2 +-
arch/arm64/include/asm/scs.h | 34 ++++++++
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/include/asm/thread_info.h | 3 +
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +
arch/arm64/kernel/efi-rt-wrapper.S | 11 ++-
arch/arm64/kernel/entry.S | 47 +++++++++-
arch/arm64/kernel/head.S | 8 ++
arch/arm64/kernel/process.c | 2 +
arch/arm64/kernel/scs.c | 21 +++++
arch/arm64/kernel/smp.c | 4 +
arch/arm64/kernel/vdso/Makefile | 2 +-
arch/arm64/mm/proc.S | 14 +++
drivers/base/node.c | 6 ++
drivers/firmware/efi/libstub/Makefile | 3 +
fs/proc/meminfo.c | 4 +
include/linux/compiler-clang.h | 6 ++
include/linux/compiler_types.h | 4 +
include/linux/mmzone.h | 3 +
include/linux/scs.h | 92 ++++++++++++++++++++
init/init_task.c | 8 ++
kernel/Makefile | 1 +
kernel/fork.c | 9 ++
kernel/sched/core.c | 2 +
kernel/scs.c | 121 ++++++++++++++++++++++++++
mm/page_alloc.c | 6 ++
mm/vmstat.c | 3 +
32 files changed, 456 insertions(+), 7 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c


base-commit: ae83d0b416db002fe95601e7f97f64b59514d936
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-21 02:16:30

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v12 01/12] add support for Clang's Shadow Call Stack (SCS)

This change adds generic support for Clang's Shadow Call Stack,
which uses a shadow stack to protect return addresses from being
overwritten by an attacker. Details are available here:

https://clang.llvm.org/docs/ShadowCallStack.html

Note that security guarantees in the kernel differ from the
ones documented for user space. The kernel must store addresses
of shadow stacks used by other tasks and interrupt handlers in
memory, which means an attacker capable reading and writing
arbitrary memory may be able to locate them and hijack control
flow by modifying shadow stacks that are not currently in use.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Miguel Ojeda <[email protected]>
---
Makefile | 6 +++
arch/Kconfig | 25 +++++++++
include/linux/compiler-clang.h | 6 +++
include/linux/compiler_types.h | 4 ++
include/linux/scs.h | 92 ++++++++++++++++++++++++++++++++++
init/init_task.c | 8 +++
kernel/Makefile | 1 +
kernel/fork.c | 9 ++++
kernel/sched/core.c | 2 +
kernel/scs.c | 67 +++++++++++++++++++++++++
10 files changed, 220 insertions(+)
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c

diff --git a/Makefile b/Makefile
index 49b2709ff44e..6094db2c7252 100644
--- a/Makefile
+++ b/Makefile
@@ -866,6 +866,12 @@ ifdef CONFIG_LIVEPATCH
KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
endif

+ifdef CONFIG_SHADOW_CALL_STACK
+CC_FLAGS_SCS := -fsanitize=shadow-call-stack
+KBUILD_CFLAGS += $(CC_FLAGS_SCS)
+export CC_FLAGS_SCS
+endif
+
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)

diff --git a/arch/Kconfig b/arch/Kconfig
index 786a85d4ad40..8450d56e6af6 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -533,6 +533,31 @@ config STACKPROTECTOR_STRONG
about 20% of all kernel functions, which increases the kernel code
size by about 2%.

+config ARCH_SUPPORTS_SHADOW_CALL_STACK
+ bool
+ help
+ An architecture should select this if it supports Clang's Shadow
+ Call Stack, has asm/scs.h, and implements runtime support for shadow
+ stack switching.
+
+config SHADOW_CALL_STACK
+ bool "Clang Shadow Call Stack"
+ depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
+ help
+ This option enables Clang's Shadow Call Stack, which uses a
+ shadow stack to protect function return addresses from being
+ overwritten by an attacker. More information can be found in
+ Clang's documentation:
+
+ https://clang.llvm.org/docs/ShadowCallStack.html
+
+ Note that security guarantees in the kernel differ from the ones
+ documented for user space. The kernel must store addresses of shadow
+ stacks used by other tasks and interrupt handlers in memory, which
+ means an attacker capable of reading and writing arbitrary memory
+ may be able to locate them and hijack control flow by modifying
+ shadow stacks that are not currently in use.
+
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 333a6695a918..18fc4d29ef27 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -42,3 +42,9 @@
* compilers, like ICC.
*/
#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#if __has_feature(shadow_call_stack)
+# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
+#else
+# define __noscs
+#endif
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index e970f97a7fcb..97b62f47a80d 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -193,6 +193,10 @@ struct ftrace_likely_data {
# define randomized_struct_fields_end
#endif

+#ifndef __noscs
+# define __noscs
+#endif
+
#ifndef asm_volatile_goto
#define asm_volatile_goto(x...) asm goto(x)
#endif
diff --git a/include/linux/scs.h b/include/linux/scs.h
new file mode 100644
index 000000000000..051d27ad3da4
--- /dev/null
+++ b/include/linux/scs.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#ifndef _LINUX_SCS_H
+#define _LINUX_SCS_H
+
+#include <linux/gfp.h>
+#include <linux/poison.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+/*
+ * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
+ * architecture) provided ~40% safety margin on stack usage while keeping
+ * memory allocation overhead reasonable.
+ */
+#define SCS_SIZE 1024UL
+#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
+
+/* An illegal pointer value to mark the end of the shadow stack. */
+#define SCS_END_MAGIC (0x5f6UL + POISON_POINTER_DELTA)
+
+#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack)
+
+static inline void task_set_scs(struct task_struct *tsk, void *s)
+{
+ task_scs(tsk) = s;
+}
+
+extern void scs_init(void);
+
+static inline void *__scs_base(struct task_struct *tsk)
+{
+ /*
+ * To minimize the risk of exposure, architectures may clear a
+ * task's thread_info::shadow_call_stack while that task is
+ * running, and only save/restore the active shadow call stack
+ * pointer when the usual register may be clobbered (e.g. across
+ * context switches).
+ *
+ * The shadow call stack is aligned to SCS_SIZE, and grows
+ * upwards, so we can mask out the low bits to extract the base
+ * when the task is not running.
+ */
+ return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
+}
+
+static inline void scs_task_reset(struct task_struct *tsk)
+{
+ /*
+ * Reset the shadow stack to the base address in case the task
+ * is reused.
+ */
+ task_set_scs(tsk, __scs_base(tsk));
+}
+
+extern int scs_prepare(struct task_struct *tsk, int node);
+
+static inline unsigned long *__scs_magic(void *s)
+{
+ return (unsigned long *)(s + SCS_SIZE) - 1;
+}
+
+static inline bool scs_corrupted(struct task_struct *tsk)
+{
+ unsigned long *magic = __scs_magic(__scs_base(tsk));
+
+ return READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
+}
+
+extern void scs_release(struct task_struct *tsk);
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+#define task_scs(tsk) NULL
+
+static inline void task_set_scs(struct task_struct *tsk, void *s) {}
+static inline void scs_init(void) {}
+static inline void scs_task_reset(struct task_struct *tsk) {}
+static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
+static inline bool scs_corrupted(struct task_struct *tsk) { return false; }
+static inline void scs_release(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* _LINUX_SCS_H */
diff --git a/init/init_task.c b/init/init_task.c
index bd403ed3e418..aaa71366d162 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/audit.h>
#include <linux/numa.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <linux/uaccess.h>
@@ -185,6 +186,13 @@ struct task_struct init_task
};
EXPORT_SYMBOL(init_task);

+#ifdef CONFIG_SHADOW_CALL_STACK
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
+ __aligned(SCS_SIZE) = {
+ [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
+};
+#endif
+
/*
* Initial thread structure. Alignment of this is handled by a special
* linker map entry.
diff --git a/kernel/Makefile b/kernel/Makefile
index 4cb4130ced32..c332eb9d4841 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -103,6 +103,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-$(CONFIG_PERF_EVENTS) += events/

diff --git a/kernel/fork.c b/kernel/fork.c
index 8c700f881d92..f6339f9d232d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,6 +94,7 @@
#include <linux/thread_info.h>
#include <linux/stackleak.h>
#include <linux/kasan.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -456,6 +457,8 @@ void put_task_stack(struct task_struct *tsk)

void free_task(struct task_struct *tsk)
{
+ scs_release(tsk);
+
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
@@ -840,6 +843,8 @@ void __init fork_init(void)
NULL, free_vm_stack_cache);
#endif

+ scs_init();
+
lockdep_init_task(&init_task);
uprobes_init();
}
@@ -899,6 +904,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_stack;

+ err = scs_prepare(tsk, node);
+ if (err)
+ goto free_stack;
+
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3a61a3b8eaa9..c99620c1ec20 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11,6 +11,7 @@
#include <linux/nospec.h>

#include <linux/kcov.h>
+#include <linux/scs.h>

#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -6045,6 +6046,7 @@ void init_idle(struct task_struct *idle, int cpu)
idle->se.exec_start = sched_clock();
idle->flags |= PF_IDLE;

+ scs_task_reset(idle);
kasan_unpoison_task_stack(idle);

#ifdef CONFIG_SMP
diff --git a/kernel/scs.c b/kernel/scs.c
new file mode 100644
index 000000000000..e1a8fc453b86
--- /dev/null
+++ b/kernel/scs.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/kasan.h>
+#include <linux/scs.h>
+#include <linux/slab.h>
+#include <asm/scs.h>
+
+static struct kmem_cache *scs_cache;
+
+static void *scs_alloc(int node)
+{
+ void *s;
+
+ s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
+ if (s) {
+ *__scs_magic(s) = SCS_END_MAGIC;
+ /*
+ * Poison the allocation to catch unintentional accesses to
+ * the shadow stack when KASAN is enabled.
+ */
+ kasan_poison_object_data(scs_cache, s);
+ }
+
+ return s;
+}
+
+static void scs_free(void *s)
+{
+ kasan_unpoison_object_data(scs_cache, s);
+ kmem_cache_free(scs_cache, s);
+}
+
+void __init scs_init(void)
+{
+ scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
+ 0, NULL);
+}
+
+int scs_prepare(struct task_struct *tsk, int node)
+{
+ void *s;
+
+ s = scs_alloc(node);
+ if (!s)
+ return -ENOMEM;
+
+ task_set_scs(tsk, s);
+ return 0;
+}
+
+void scs_release(struct task_struct *tsk)
+{
+ void *s;
+
+ s = __scs_base(tsk);
+ if (!s)
+ return;
+
+ WARN_ON(scs_corrupted(tsk));
+
+ scs_free(s);
+}
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-21 02:16:49

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v12 05/12] arm64: reserve x18 from general allocation with SCS

Reserve the x18 register from general allocation when SCS is enabled,
because the compiler uses the register to store the current task's
shadow stack pointer. Note that all external kernel modules must also be
compiled with -ffixed-x18 if the kernel has SCS enabled.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/Makefile | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 85e4149cc5d5..409a6c1be8cc 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -81,6 +81,10 @@ endif

KBUILD_CFLAGS += $(branch-prot-flags-y)

+ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
+KBUILD_CFLAGS += -ffixed-x18
+endif
+
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
CHECKFLAGS += -D__AARCH64EB__
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-21 02:16:49

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v12 06/12] arm64: preserve x18 when CPU is suspended

Don't lose the current task's shadow stack when the CPU is suspended.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/mm/proc.S | 14 ++++++++++++++
2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 8939c87c4dce..0cde2f473971 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H

-#define NR_CTX_REGS 12
+#define NR_CTX_REGS 13
#define NR_CALLEE_SAVED_REGS 12

/*
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 197a9ba2d5ea..ed15be0f8103 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -58,6 +58,8 @@
* cpu_do_suspend - save CPU registers context
*
* x0: virtual address of context pointer
+ *
+ * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
*/
SYM_FUNC_START(cpu_do_suspend)
mrs x2, tpidr_el0
@@ -82,6 +84,11 @@ alternative_endif
stp x8, x9, [x0, #48]
stp x10, x11, [x0, #64]
stp x12, x13, [x0, #80]
+ /*
+ * Save x18 as it may be used as a platform register, e.g. by shadow
+ * call stack.
+ */
+ str x18, [x0, #96]
ret
SYM_FUNC_END(cpu_do_suspend)

@@ -98,6 +105,13 @@ SYM_FUNC_START(cpu_do_resume)
ldp x9, x10, [x0, #48]
ldp x11, x12, [x0, #64]
ldp x13, x14, [x0, #80]
+ /*
+ * Restore x18, as it may be used as a platform register, and clear
+ * the buffer to minimize the risk of exposure when used for shadow
+ * call stack.
+ */
+ ldr x18, [x0, #96]
+ str xzr, [x0, #96]
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-21 02:17:00

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v12 08/12] arm64: vdso: disable Shadow Call Stack

Shadow stacks are only available in the kernel, so disable SCS
instrumentation for the vDSO.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/kernel/vdso/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index dd2514bb1511..a87a4f11724e 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -25,7 +25,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING

VDSO_LDFLAGS := -Bsymbolic

-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS)
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-21 02:17:07

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v12 10/12] arm64: implement Shadow Call Stack

This change implements shadow stack switching, initial SCS set-up,
and interrupt shadow stacks for arm64.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/Kconfig | 5 ++++
arch/arm64/include/asm/scs.h | 34 ++++++++++++++++++++++++++++
arch/arm64/include/asm/thread_info.h | 3 +++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 3 +++
arch/arm64/kernel/entry.S | 33 +++++++++++++++++++++++++--
arch/arm64/kernel/head.S | 8 +++++++
arch/arm64/kernel/process.c | 2 ++
arch/arm64/kernel/scs.c | 16 +++++++++++++
arch/arm64/kernel/smp.c | 4 ++++
10 files changed, 107 insertions(+), 2 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 40fb05d96c60..c380a16533f6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -64,6 +64,7 @@ config ARM64
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_SUPPORTS_MEMORY_FAILURE
+ select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
select ARCH_SUPPORTS_NUMA_BALANCING
@@ -1025,6 +1026,10 @@ config ARCH_HAS_CACHE_LINE_SIZE
config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y if PGTABLE_LEVELS > 2

+# Supported by clang >= 7.0
+config CC_HAVE_SHADOW_CALL_STACK
+ def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
+
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
---help---
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
new file mode 100644
index 000000000000..27dced9cbaf7
--- /dev/null
+++ b/arch/arm64/include/asm/scs.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/scs.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+static __always_inline void scs_save(struct task_struct *tsk)
+{
+ void *s;
+
+ asm volatile("mov %0, x18" : "=r" (s));
+ task_set_scs(tsk, s);
+}
+
+static inline void scs_overflow_check(struct task_struct *tsk)
+{
+ if (unlikely(scs_corrupted(tsk)))
+ panic("corrupted shadow stack detected inside scheduler\n");
+}
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+static inline void scs_save(struct task_struct *tsk) {}
+static inline void scs_overflow_check(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* __ASSEMBLY __ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 512174a8e789..1fb651f73da3 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -41,6 +41,9 @@ struct thread_info {
#endif
} preempt;
};
+#ifdef CONFIG_SHADOW_CALL_STACK
+ void *shadow_call_stack;
+#endif
};

#define thread_saved_pc(tsk) \
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 4e5b8ee31442..151f28521f1e 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-y += vdso/ probes/
obj-$(CONFIG_COMPAT_VDSO) += vdso32/
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 9981a0a5a87f..777a662888ec 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -33,6 +33,9 @@ int main(void)
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
+#endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+ DEFINE(TSK_TI_SCS, offsetof(struct task_struct, thread_info.shadow_call_stack));
#endif
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
#ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ddcde093c433..14f0ff763b39 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -179,6 +179,11 @@ alternative_cb_end
apply_ssbd 1, x22, x23

ptrauth_keys_install_kernel tsk, 1, x20, x22, x23
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [tsk, #TSK_TI_SCS] // Restore shadow call stack
+ str xzr, [tsk, #TSK_TI_SCS] // Limit visibility of saved SCS
+#endif
.else
add x21, sp, #S_FRAME_SIZE
get_current_task tsk
@@ -280,6 +285,12 @@ alternative_else_nop_endif
ct_user_enter
.endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ .if \el == 0
+ str x18, [tsk, #TSK_TI_SCS] // Save shadow call stack
+ .endif
+#endif
+
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
/*
* Restore access to TTBR0_EL1. If returning to EL0, no need for SPSR
@@ -388,6 +399,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

.macro irq_stack_entry
mov x19, sp // preserve the original sp
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x24, x18 // preserve the original shadow stack
+#endif

/*
* Compare sp with the base of the task stack.
@@ -405,15 +419,25 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

/* switch to the irq stack */
mov sp, x26
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* also switch to the irq shadow stack */
+ adr_this_cpu x18, irq_shadow_call_stack, x26
+#endif
+
9998:
.endm

/*
- * x19 should be preserved between irq_stack_entry and
- * irq_stack_exit.
+ * The callee-saved regs (x19-x29) should be preserved between
+ * irq_stack_entry and irq_stack_exit, but note that kernel_entry
+ * uses x20-x23 to store data for later use.
*/
.macro irq_stack_exit
mov sp, x19
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x18, x24
+#endif
.endm

/* GPRs used by entry code */
@@ -901,6 +925,11 @@ SYM_FUNC_START(cpu_switch_to)
mov sp, x9
msr sp_el0, x1
ptrauth_keys_install_kernel x1, 1, x8, x9, x10
+#ifdef CONFIG_SHADOW_CALL_STACK
+ str x18, [x0, #TSK_TI_SCS]
+ ldr x18, [x1, #TSK_TI_SCS]
+ str xzr, [x1, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
ret
SYM_FUNC_END(cpu_switch_to)
NOKPROBE(cpu_switch_to)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 57a91032b4c2..1514445bbccb 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -424,6 +424,10 @@ SYM_FUNC_START_LOCAL(__primary_switched)
stp xzr, x30, [sp, #-16]!
mov x29, sp

+#ifdef CONFIG_SHADOW_CALL_STACK
+ adr_l x18, init_shadow_call_stack // Set shadow call stack
+#endif
+
str_l x21, __fdt_pointer, x5 // Save FDT pointer

ldr_l x4, kimage_vaddr // Save the offset between
@@ -737,6 +741,10 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow
msr sp_el0, x2
+#ifdef CONFIG_SHADOW_CALL_STACK
+ ldr x18, [x2, #TSK_TI_SCS] // set shadow call stack
+ str xzr, [x2, #TSK_TI_SCS] // limit visibility of saved SCS
+#endif
mov x29, #0
mov x30, #0
b secondary_start_kernel
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 56be4cbf771f..a35d3318492c 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -52,6 +52,7 @@
#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/pointer_auth.h>
+#include <asm/scs.h>
#include <asm/stacktrace.h>

#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
@@ -515,6 +516,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
entry_task_switch(next);
uao_thread_switch(next);
ssbs_thread_switch(next);
+ scs_overflow_check(next);

/*
* Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
new file mode 100644
index 000000000000..086ad97bba86
--- /dev/null
+++ b/arch/arm64/kernel/scs.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/percpu.h>
+#include <asm/scs.h>
+
+/* Allocate a static per-CPU shadow stack */
+#define DEFINE_SCS(name) \
+ DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \
+ __aligned(SCS_SIZE)
+
+DEFINE_SCS(irq_shadow_call_stack);
diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c
index 061f60fe452f..1d112e34a636 100644
--- a/arch/arm64/kernel/smp.c
+++ b/arch/arm64/kernel/smp.c
@@ -46,6 +46,7 @@
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/processor.h>
+#include <asm/scs.h>
#include <asm/smp_plat.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -370,6 +371,9 @@ void cpu_die(void)
unsigned int cpu = smp_processor_id();
const struct cpu_operations *ops = get_cpu_ops(cpu);

+ /* Save the shadow stack pointer before exiting the idle task */
+ scs_save(current);
+
idle_task_exit();

local_daif_mask();
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-21 02:17:31

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v12 11/12] arm64: scs: add shadow stacks for SDEI

This change adds per-CPU shadow call stacks for the SDEI handler.
Similarly to how the kernel stacks are handled, we add separate shadow
stacks for normal and critical events.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: James Morse <[email protected]>
Tested-by: James Morse <[email protected]>
---
arch/arm64/kernel/entry.S | 14 +++++++++++++-
arch/arm64/kernel/scs.c | 5 +++++
2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 14f0ff763b39..9f7be489d26d 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1058,13 +1058,16 @@ SYM_CODE_START(__sdei_asm_handler)

mov x19, x1

+#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
+ ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
+#endif
+
#ifdef CONFIG_VMAP_STACK
/*
* entry.S may have been using sp as a scratch register, find whether
* this is a normal or critical event and switch to the appropriate
* stack for this CPU.
*/
- ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
cbnz w4, 1f
ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
b 2f
@@ -1074,6 +1077,15 @@ SYM_CODE_START(__sdei_asm_handler)
mov sp, x5
#endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* Use a separate shadow call stack for normal and critical events */
+ cbnz w4, 3f
+ adr_this_cpu dst=x18, sym=sdei_shadow_call_stack_normal, tmp=x6
+ b 4f
+3: adr_this_cpu dst=x18, sym=sdei_shadow_call_stack_critical, tmp=x6
+4:
+#endif
+
/*
* We may have interrupted userspace, or a guest, or exit-from or
* return-to either of these. We can't trust sp_el0, restore it.
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
index 086ad97bba86..656262736eca 100644
--- a/arch/arm64/kernel/scs.c
+++ b/arch/arm64/kernel/scs.c
@@ -14,3 +14,8 @@
__aligned(SCS_SIZE)

DEFINE_SCS(irq_shadow_call_stack);
+
+#ifdef CONFIG_ARM_SDE_INTERFACE
+DEFINE_SCS(sdei_shadow_call_stack_normal);
+DEFINE_SCS(sdei_shadow_call_stack_critical);
+#endif
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-21 02:17:35

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v12 12/12] efi/libstub: disable SCS

Shadow stacks are not available in the EFI stub, filter out SCS flags.

Suggested-by: James Morse <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Ard Biesheuvel <[email protected]>
---
drivers/firmware/efi/libstub/Makefile | 3 +++
1 file changed, 3 insertions(+)

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 094eabdecfe6..b52ae8c29560 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -32,6 +32,9 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
$(call cc-option,-fno-stack-protector) \
-D__DISABLE_EXPORTS

+# remove SCS flags from all objects in this directory
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+
GCOV_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-21 02:17:41

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v12 07/12] arm64: efi: restore x18 if it was corrupted

If we detect a corrupted x18, restore the register before jumping back
to potentially SCS instrumented code. This is safe, because the wrapper
is called with preemption disabled and a separate shadow stack is used
for interrupt handling.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/kernel/efi-rt-wrapper.S | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 3fc71106cb2b..6ca6c0dc11a1 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -34,5 +34,14 @@ ENTRY(__efi_rt_asm_wrapper)
ldp x29, x30, [sp], #32
b.ne 0f
ret
-0: b efi_handle_corrupted_x18 // tail call
+0:
+ /*
+ * With CONFIG_SHADOW_CALL_STACK, the kernel uses x18 to store a
+ * shadow stack pointer, which we need to restore before returning to
+ * potentially instrumented code. This is safe because the wrapper is
+ * called with preemption disabled and a separate shadow stack is used
+ * for interrupts.
+ */
+ mov x18, x2
+ b efi_handle_corrupted_x18 // tail call
ENDPROC(__efi_rt_asm_wrapper)
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-21 02:18:13

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v12 04/12] scs: disable when function graph tracing is enabled

The graph tracer hooks returns by modifying frame records on the
(regular) stack, but with SCS the return address is taken from the
shadow stack, and the value in the frame record has no effect. As we
don't currently have a mechanism to determine the corresponding slot
on the shadow stack (and to pass this through the ftrace
infrastructure), for now let's disable SCS when the graph tracer is
enabled.

With SCS the return address is taken from the shadow stack and the
value in the frame record has no effect. The mcount based graph tracer
hooks returns by modifying frame records on the (regular) stack, and
thus is not compatible. The patchable-function-entry graph tracer
used for DYNAMIC_FTRACE_WITH_REGS modifies the LR before it is saved
to the shadow stack, and is compatible.

Modifying the mcount based graph tracer to work with SCS would require
a mechanism to determine the corresponding slot on the shadow stack
(and to pass this through the ftrace infrastructure), and we expect
that everyone will eventually move to the patchable-function-entry
based graph tracer anyway, so for now let's disable SCS when the
mcount-based graph tracer is enabled.

SCS and patchable-function-entry are both supported from LLVM 10.x.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/Kconfig | 1 +
1 file changed, 1 insertion(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index 8450d56e6af6..b52929f38cf7 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -543,6 +543,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
config SHADOW_CALL_STACK
bool "Clang Shadow Call Stack"
depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
+ depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
help
This option enables Clang's Shadow Call Stack, which uses a
shadow stack to protect function return addresses from being
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-21 02:18:16

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v12 03/12] scs: add support for stack usage debugging

Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks. When enabled,
also prints out the highest shadow stack usage per process.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
kernel/scs.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)

diff --git a/kernel/scs.c b/kernel/scs.c
index 7eea2d97bd2d..147917e31adf 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -68,6 +68,43 @@ int scs_prepare(struct task_struct *tsk, int node)
return 0;
}

+#ifdef CONFIG_DEBUG_STACK_USAGE
+static unsigned long __scs_used(struct task_struct *tsk)
+{
+ unsigned long *p = __scs_base(tsk);
+ unsigned long *end = __scs_magic(p);
+ unsigned long s = (unsigned long)p;
+
+ while (p < end && READ_ONCE_NOCHECK(*p))
+ p++;
+
+ return (unsigned long)p - s;
+}
+
+static void scs_check_usage(struct task_struct *tsk)
+{
+ static unsigned long highest;
+ unsigned long used = __scs_used(tsk);
+ unsigned long prev;
+ unsigned long curr = highest;
+
+ while (used > curr) {
+ prev = cmpxchg(&highest, curr, used);
+
+ if (prev == curr) {
+ pr_info("%s (%d): highest shadow stack usage: "
+ "%lu bytes\n",
+ tsk->comm, task_pid_nr(tsk), used);
+ break;
+ }
+
+ curr = prev;
+ }
+}
+#else
+static inline void scs_check_usage(struct task_struct *tsk) {}
+#endif
+
void scs_release(struct task_struct *tsk)
{
void *s;
@@ -77,6 +114,7 @@ void scs_release(struct task_struct *tsk)
return;

WARN_ON(scs_corrupted(tsk));
+ scs_check_usage(tsk);

scs_account(tsk, -1);
scs_free(s);
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-21 02:18:30

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v12 09/12] arm64: disable SCS for hypervisor code

Disable SCS for code that runs at a different exception level by
adding __noscs to __hyp_text.

Suggested-by: James Morse <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Marc Zyngier <[email protected]>
---
arch/arm64/include/asm/kvm_hyp.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index fe57f60f06a8..875b106c5d98 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -13,7 +13,7 @@
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>

-#define __hyp_text __section(.hyp.text) notrace
+#define __hyp_text __section(.hyp.text) notrace __noscs

#define read_sysreg_elx(r,nvh,vh) \
({ \
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-21 02:19:26

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v12 02/12] scs: add accounting

This change adds accounting for the memory allocated for shadow stacks.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
drivers/base/node.c | 6 ++++++
fs/proc/meminfo.c | 4 ++++
include/linux/mmzone.h | 3 +++
kernel/scs.c | 16 ++++++++++++++++
mm/page_alloc.c | 6 ++++++
mm/vmstat.c | 3 +++
6 files changed, 38 insertions(+)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 10d7e818e118..50b8c0d43859 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d AnonPages: %8lu kB\n"
"Node %d Shmem: %8lu kB\n"
"Node %d KernelStack: %8lu kB\n"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ "Node %d ShadowCallStack:%8lu kB\n"
+#endif
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
@@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB),
+#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8c1f1bb1a5ce..09cd51c8d23d 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB));
+#ifdef CONFIG_SHADOW_CALL_STACK
+ seq_printf(m, "ShadowCallStack:%8lu kB\n",
+ global_zone_page_state(NR_KERNEL_SCS_KB));
+#endif
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1b9de7d220fb..acffc3bc6178 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -156,6 +156,9 @@ enum zone_stat_item {
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ NR_KERNEL_SCS_KB, /* measured in KiB */
+#endif
/* Second 128 byte cacheline */
NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
diff --git a/kernel/scs.c b/kernel/scs.c
index e1a8fc453b86..7eea2d97bd2d 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -6,8 +6,10 @@
*/

#include <linux/kasan.h>
+#include <linux/mm.h>
#include <linux/scs.h>
#include <linux/slab.h>
+#include <linux/vmstat.h>
#include <asm/scs.h>

static struct kmem_cache *scs_cache;
@@ -41,6 +43,17 @@ void __init scs_init(void)
0, NULL);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return virt_to_page(__scs_base(tsk));
+}
+
+static void scs_account(struct task_struct *tsk, int account)
+{
+ mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_KB,
+ account * (SCS_SIZE / 1024));
+}
+
int scs_prepare(struct task_struct *tsk, int node)
{
void *s;
@@ -50,6 +63,8 @@ int scs_prepare(struct task_struct *tsk, int node)
return -ENOMEM;

task_set_scs(tsk, s);
+ scs_account(tsk, 1);
+
return 0;
}

@@ -63,5 +78,6 @@ void scs_release(struct task_struct *tsk)

WARN_ON(scs_corrupted(tsk));

+ scs_account(tsk, -1);
scs_free(s);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 69827d4fa052..83743d7a6177 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5411,6 +5411,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" managed:%lukB"
" mlocked:%lukB"
" kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ " shadow_call_stack:%lukB"
+#endif
" pagetables:%lukB"
" bounce:%lukB"
" free_pcp:%lukB"
@@ -5433,6 +5436,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)),
zone_page_state(zone, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ zone_page_state(zone, NR_KERNEL_SCS_KB),
+#endif
K(zone_page_state(zone, NR_PAGETABLE)),
K(zone_page_state(zone, NR_BOUNCE)),
K(free_pcp),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 96d21a792b57..2435d2c24657 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1119,6 +1119,9 @@ const char * const vmstat_text[] = {
"nr_mlock",
"nr_page_table_pages",
"nr_kernel_stack",
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ "nr_shadow_call_stack",
+#endif
"nr_bounce",
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
--
2.26.1.301.g55bc3eb7cb9-goog

2020-04-22 17:41:33

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v11 01/12] add support for Clang's Shadow Call Stack (SCS)

On Mon, Apr 20, 2020 at 02:18:30PM -0700, Sami Tolvanen wrote:
> On Mon, Apr 20, 2020 at 06:17:28PM +0100, Will Deacon wrote:
> > > + * The shadow call stack is aligned to SCS_SIZE, and grows
> > > + * upwards, so we can mask out the low bits to extract the base
> > > + * when the task is not running.
> > > + */
> > > + return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
> >
> > Could we avoid forcing this alignment it we stored the SCS pointer as a
> > (base,offset) pair instead? That might be friendlier on the allocations
> > later on.
>
> The idea is to avoid storing the current task's shadow stack address in
> memory, which is why I would rather not store the base address either.

What I mean is that, instead of storing the current shadow stack pointer,
we instead store a base and an offset. We can still clear the base, as you
do with the pointer today, and I don't see that the offset is useful to
an attacker on its own.

But more generally, is it really worthwhile to do this clearing at all? Can
you (or Kees?) provide some justification for it, please? We don't do it
for anything else, e.g. the pointer authentication keys, so something
feels amiss here.

Thanks,

Will

2020-04-22 17:47:09

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v12 02/12] scs: add accounting

On Mon, Apr 20, 2020 at 07:14:43PM -0700, Sami Tolvanen wrote:
> This change adds accounting for the memory allocated for shadow stacks.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
> drivers/base/node.c | 6 ++++++
> fs/proc/meminfo.c | 4 ++++
> include/linux/mmzone.h | 3 +++
> kernel/scs.c | 16 ++++++++++++++++
> mm/page_alloc.c | 6 ++++++
> mm/vmstat.c | 3 +++
> 6 files changed, 38 insertions(+)

Acked-by: Will Deacon <[email protected]>

Thanks!

Will

2020-04-22 17:49:32

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v12 03/12] scs: add support for stack usage debugging

On Mon, Apr 20, 2020 at 07:14:44PM -0700, Sami Tolvanen wrote:
> Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks. When enabled,
> also prints out the highest shadow stack usage per process.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
> kernel/scs.c | 38 ++++++++++++++++++++++++++++++++++++++
> 1 file changed, 38 insertions(+)
>
> diff --git a/kernel/scs.c b/kernel/scs.c
> index 7eea2d97bd2d..147917e31adf 100644
> --- a/kernel/scs.c
> +++ b/kernel/scs.c
> @@ -68,6 +68,43 @@ int scs_prepare(struct task_struct *tsk, int node)
> return 0;
> }
>
> +#ifdef CONFIG_DEBUG_STACK_USAGE
> +static unsigned long __scs_used(struct task_struct *tsk)
> +{
> + unsigned long *p = __scs_base(tsk);
> + unsigned long *end = __scs_magic(p);
> + unsigned long s = (unsigned long)p;
> +
> + while (p < end && READ_ONCE_NOCHECK(*p))
> + p++;
> +
> + return (unsigned long)p - s;
> +}
> +
> +static void scs_check_usage(struct task_struct *tsk)
> +{
> + static unsigned long highest;
> + unsigned long used = __scs_used(tsk);
> + unsigned long prev;
> + unsigned long curr = highest;
> +
> + while (used > curr) {
> + prev = cmpxchg(&highest, curr, used);

I think this can be cmpxchg_relaxed(), since we don't care about ordering
here afaict.

With that:

Acked-by: Will Deacon <[email protected]>

Cheers,

Will

2020-04-22 17:53:11

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v11 01/12] add support for Clang's Shadow Call Stack (SCS)

On Wed, Apr 22, 2020 at 06:39:47PM +0100, Will Deacon wrote:
> On Mon, Apr 20, 2020 at 02:18:30PM -0700, Sami Tolvanen wrote:
> > On Mon, Apr 20, 2020 at 06:17:28PM +0100, Will Deacon wrote:
> > > > + * The shadow call stack is aligned to SCS_SIZE, and grows
> > > > + * upwards, so we can mask out the low bits to extract the base
> > > > + * when the task is not running.
> > > > + */
> > > > + return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
> > >
> > > Could we avoid forcing this alignment it we stored the SCS pointer as a
> > > (base,offset) pair instead? That might be friendlier on the allocations
> > > later on.
> >
> > The idea is to avoid storing the current task's shadow stack address in
> > memory, which is why I would rather not store the base address either.
>
> What I mean is that, instead of storing the current shadow stack pointer,
> we instead store a base and an offset. We can still clear the base, as you
> do with the pointer today, and I don't see that the offset is useful to
> an attacker on its own.
>
> But more generally, is it really worthwhile to do this clearing at all? Can
> you (or Kees?) provide some justification for it, please? We don't do it
> for anything else, e.g. the pointer authentication keys, so something
> feels amiss here.

It's a hardening step to just reduce the lifetime of a valid address
exposed in memory. In fact, since there is a cache, I think it should be
wiped even in scs_release().

--
Kees Cook

2020-04-22 18:03:11

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v12 01/12] add support for Clang's Shadow Call Stack (SCS)

On Wed, Apr 22, 2020 at 10:54:45AM -0700, Kees Cook wrote:
> On Mon, Apr 20, 2020 at 07:14:42PM -0700, Sami Tolvanen wrote:
> > +void scs_release(struct task_struct *tsk)
> > +{
> > + void *s;
> > +
> > + s = __scs_base(tsk);
> > + if (!s)
> > + return;
> > +
> > + WARN_ON(scs_corrupted(tsk));
> > +
>
> I'd like to have task_set_scs(tsk, NULL) retained here, to avoid need to
> depend on the released task memory getting scrubbed at a later time.

Hmm, doesn't it get zeroed almost immediately by kmem_cache_free() if
INIT_ON_FREE_DEFAULT_ON is set? That seems much better than special-casing
SCS, as there's a tonne of other useful stuff kicking around in the
task_struct and treating this specially feels odd to me.

Will

2020-04-22 18:04:11

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v11 01/12] add support for Clang's Shadow Call Stack (SCS)

On Wed, Apr 22, 2020 at 10:51:02AM -0700, Kees Cook wrote:
> On Wed, Apr 22, 2020 at 06:39:47PM +0100, Will Deacon wrote:
> > On Mon, Apr 20, 2020 at 02:18:30PM -0700, Sami Tolvanen wrote:
> > > On Mon, Apr 20, 2020 at 06:17:28PM +0100, Will Deacon wrote:
> > > > > + * The shadow call stack is aligned to SCS_SIZE, and grows
> > > > > + * upwards, so we can mask out the low bits to extract the base
> > > > > + * when the task is not running.
> > > > > + */
> > > > > + return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
> > > >
> > > > Could we avoid forcing this alignment it we stored the SCS pointer as a
> > > > (base,offset) pair instead? That might be friendlier on the allocations
> > > > later on.
> > >
> > > The idea is to avoid storing the current task's shadow stack address in
> > > memory, which is why I would rather not store the base address either.
> >
> > What I mean is that, instead of storing the current shadow stack pointer,
> > we instead store a base and an offset. We can still clear the base, as you
> > do with the pointer today, and I don't see that the offset is useful to
> > an attacker on its own.
> >
> > But more generally, is it really worthwhile to do this clearing at all? Can
> > you (or Kees?) provide some justification for it, please? We don't do it
> > for anything else, e.g. the pointer authentication keys, so something
> > feels amiss here.
>
> It's a hardening step to just reduce the lifetime of a valid address
> exposed in memory. In fact, since there is a cache, I think it should be
> wiped even in scs_release().

But we don't do this for /anything/ else and it forces alignment
restrictions on the SCS allocation. Please either do it consistently, or
not at all.

Will

2020-04-22 19:05:44

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v12 01/12] add support for Clang's Shadow Call Stack (SCS)

On Mon, Apr 20, 2020 at 07:14:42PM -0700, Sami Tolvanen wrote:
> This change adds generic support for Clang's Shadow Call Stack,
> which uses a shadow stack to protect return addresses from being
> overwritten by an attacker. Details are available here:
>
> https://clang.llvm.org/docs/ShadowCallStack.html
>
> Note that security guarantees in the kernel differ from the
> ones documented for user space. The kernel must store addresses
> of shadow stacks used by other tasks and interrupt handlers in
> memory, which means an attacker capable reading and writing
> arbitrary memory may be able to locate them and hijack control
> flow by modifying shadow stacks that are not currently in use.
>
> Signed-off-by: Sami Tolvanen <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> Reviewed-by: Miguel Ojeda <[email protected]>
> ---
> Makefile | 6 +++
> arch/Kconfig | 25 +++++++++
> include/linux/compiler-clang.h | 6 +++
> include/linux/compiler_types.h | 4 ++
> include/linux/scs.h | 92 ++++++++++++++++++++++++++++++++++
> init/init_task.c | 8 +++
> kernel/Makefile | 1 +
> kernel/fork.c | 9 ++++
> kernel/sched/core.c | 2 +
> kernel/scs.c | 67 +++++++++++++++++++++++++
> 10 files changed, 220 insertions(+)
> create mode 100644 include/linux/scs.h
> create mode 100644 kernel/scs.c
>
> diff --git a/Makefile b/Makefile
> index 49b2709ff44e..6094db2c7252 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -866,6 +866,12 @@ ifdef CONFIG_LIVEPATCH
> KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
> endif
>
> +ifdef CONFIG_SHADOW_CALL_STACK
> +CC_FLAGS_SCS := -fsanitize=shadow-call-stack
> +KBUILD_CFLAGS += $(CC_FLAGS_SCS)
> +export CC_FLAGS_SCS
> +endif
> +
> # arch Makefile may override CC so keep this after arch Makefile is included
> NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)
>
> diff --git a/arch/Kconfig b/arch/Kconfig
> index 786a85d4ad40..8450d56e6af6 100644
> --- a/arch/Kconfig
> +++ b/arch/Kconfig
> @@ -533,6 +533,31 @@ config STACKPROTECTOR_STRONG
> about 20% of all kernel functions, which increases the kernel code
> size by about 2%.
>
> +config ARCH_SUPPORTS_SHADOW_CALL_STACK
> + bool
> + help
> + An architecture should select this if it supports Clang's Shadow
> + Call Stack, has asm/scs.h, and implements runtime support for shadow
> + stack switching.
> +
> +config SHADOW_CALL_STACK
> + bool "Clang Shadow Call Stack"
> + depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
> + help
> + This option enables Clang's Shadow Call Stack, which uses a
> + shadow stack to protect function return addresses from being
> + overwritten by an attacker. More information can be found in
> + Clang's documentation:
> +
> + https://clang.llvm.org/docs/ShadowCallStack.html
> +
> + Note that security guarantees in the kernel differ from the ones
> + documented for user space. The kernel must store addresses of shadow
> + stacks used by other tasks and interrupt handlers in memory, which
> + means an attacker capable of reading and writing arbitrary memory
> + may be able to locate them and hijack control flow by modifying
> + shadow stacks that are not currently in use.
> +
> config HAVE_ARCH_WITHIN_STACK_FRAMES
> bool
> help
> diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
> index 333a6695a918..18fc4d29ef27 100644
> --- a/include/linux/compiler-clang.h
> +++ b/include/linux/compiler-clang.h
> @@ -42,3 +42,9 @@
> * compilers, like ICC.
> */
> #define barrier() __asm__ __volatile__("" : : : "memory")
> +
> +#if __has_feature(shadow_call_stack)
> +# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
> +#else
> +# define __noscs
> +#endif
> diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
> index e970f97a7fcb..97b62f47a80d 100644
> --- a/include/linux/compiler_types.h
> +++ b/include/linux/compiler_types.h
> @@ -193,6 +193,10 @@ struct ftrace_likely_data {
> # define randomized_struct_fields_end
> #endif
>
> +#ifndef __noscs
> +# define __noscs
> +#endif
> +
> #ifndef asm_volatile_goto
> #define asm_volatile_goto(x...) asm goto(x)
> #endif
> diff --git a/include/linux/scs.h b/include/linux/scs.h
> new file mode 100644
> index 000000000000..051d27ad3da4
> --- /dev/null
> +++ b/include/linux/scs.h
> @@ -0,0 +1,92 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Shadow Call Stack support.
> + *
> + * Copyright (C) 2019 Google LLC
> + */
> +
> +#ifndef _LINUX_SCS_H
> +#define _LINUX_SCS_H
> +
> +#include <linux/gfp.h>
> +#include <linux/poison.h>
> +#include <linux/sched.h>
> +#include <asm/page.h>
> +
> +#ifdef CONFIG_SHADOW_CALL_STACK
> +
> +/*
> + * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
> + * architecture) provided ~40% safety margin on stack usage while keeping
> + * memory allocation overhead reasonable.
> + */
> +#define SCS_SIZE 1024UL
> +#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
> +
> +/* An illegal pointer value to mark the end of the shadow stack. */
> +#define SCS_END_MAGIC (0x5f6UL + POISON_POINTER_DELTA)
> +
> +#define task_scs(tsk) (task_thread_info(tsk)->shadow_call_stack)
> +
> +static inline void task_set_scs(struct task_struct *tsk, void *s)
> +{
> + task_scs(tsk) = s;
> +}
> +
> +extern void scs_init(void);
> +
> +static inline void *__scs_base(struct task_struct *tsk)
> +{
> + /*
> + * To minimize the risk of exposure, architectures may clear a
> + * task's thread_info::shadow_call_stack while that task is
> + * running, and only save/restore the active shadow call stack
> + * pointer when the usual register may be clobbered (e.g. across
> + * context switches).
> + *
> + * The shadow call stack is aligned to SCS_SIZE, and grows
> + * upwards, so we can mask out the low bits to extract the base
> + * when the task is not running.
> + */
> + return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
> +}
> +
> +static inline void scs_task_reset(struct task_struct *tsk)
> +{
> + /*
> + * Reset the shadow stack to the base address in case the task
> + * is reused.
> + */
> + task_set_scs(tsk, __scs_base(tsk));
> +}
> +
> +extern int scs_prepare(struct task_struct *tsk, int node);
> +
> +static inline unsigned long *__scs_magic(void *s)
> +{
> + return (unsigned long *)(s + SCS_SIZE) - 1;
> +}
> +
> +static inline bool scs_corrupted(struct task_struct *tsk)
> +{
> + unsigned long *magic = __scs_magic(__scs_base(tsk));
> +
> + return READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC;
> +}
> +
> +extern void scs_release(struct task_struct *tsk);
> +
> +#else /* CONFIG_SHADOW_CALL_STACK */
> +
> +#define task_scs(tsk) NULL
> +
> +static inline void task_set_scs(struct task_struct *tsk, void *s) {}
> +static inline void scs_init(void) {}
> +static inline void scs_task_reset(struct task_struct *tsk) {}
> +static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
> +static inline bool scs_corrupted(struct task_struct *tsk) { return false; }
> +static inline void scs_release(struct task_struct *tsk) {}
> +
> +#endif /* CONFIG_SHADOW_CALL_STACK */
> +
> +#endif /* _LINUX_SCS_H */
> diff --git a/init/init_task.c b/init/init_task.c
> index bd403ed3e418..aaa71366d162 100644
> --- a/init/init_task.c
> +++ b/init/init_task.c
> @@ -11,6 +11,7 @@
> #include <linux/mm.h>
> #include <linux/audit.h>
> #include <linux/numa.h>
> +#include <linux/scs.h>
>
> #include <asm/pgtable.h>
> #include <linux/uaccess.h>
> @@ -185,6 +186,13 @@ struct task_struct init_task
> };
> EXPORT_SYMBOL(init_task);
>
> +#ifdef CONFIG_SHADOW_CALL_STACK
> +unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] __init_task_data
> + __aligned(SCS_SIZE) = {
> + [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
> +};
> +#endif
> +
> /*
> * Initial thread structure. Alignment of this is handled by a special
> * linker map entry.
> diff --git a/kernel/Makefile b/kernel/Makefile
> index 4cb4130ced32..c332eb9d4841 100644
> --- a/kernel/Makefile
> +++ b/kernel/Makefile
> @@ -103,6 +103,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
> obj-$(CONFIG_IRQ_WORK) += irq_work.o
> obj-$(CONFIG_CPU_PM) += cpu_pm.o
> obj-$(CONFIG_BPF) += bpf/
> +obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
>
> obj-$(CONFIG_PERF_EVENTS) += events/
>
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 8c700f881d92..f6339f9d232d 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -94,6 +94,7 @@
> #include <linux/thread_info.h>
> #include <linux/stackleak.h>
> #include <linux/kasan.h>
> +#include <linux/scs.h>
>
> #include <asm/pgtable.h>
> #include <asm/pgalloc.h>
> @@ -456,6 +457,8 @@ void put_task_stack(struct task_struct *tsk)
>
> void free_task(struct task_struct *tsk)
> {
> + scs_release(tsk);
> +
> #ifndef CONFIG_THREAD_INFO_IN_TASK
> /*
> * The task is finally done with both the stack and thread_info,
> @@ -840,6 +843,8 @@ void __init fork_init(void)
> NULL, free_vm_stack_cache);
> #endif
>
> + scs_init();
> +
> lockdep_init_task(&init_task);
> uprobes_init();
> }
> @@ -899,6 +904,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
> if (err)
> goto free_stack;
>
> + err = scs_prepare(tsk, node);
> + if (err)
> + goto free_stack;
> +
> #ifdef CONFIG_SECCOMP
> /*
> * We must handle setting up seccomp filters once we're under
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 3a61a3b8eaa9..c99620c1ec20 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -11,6 +11,7 @@
> #include <linux/nospec.h>
>
> #include <linux/kcov.h>
> +#include <linux/scs.h>
>
> #include <asm/switch_to.h>
> #include <asm/tlb.h>
> @@ -6045,6 +6046,7 @@ void init_idle(struct task_struct *idle, int cpu)
> idle->se.exec_start = sched_clock();
> idle->flags |= PF_IDLE;
>
> + scs_task_reset(idle);
> kasan_unpoison_task_stack(idle);
>
> #ifdef CONFIG_SMP
> diff --git a/kernel/scs.c b/kernel/scs.c
> new file mode 100644
> index 000000000000..e1a8fc453b86
> --- /dev/null
> +++ b/kernel/scs.c
> @@ -0,0 +1,67 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Shadow Call Stack support.
> + *
> + * Copyright (C) 2019 Google LLC
> + */
> +
> +#include <linux/kasan.h>
> +#include <linux/scs.h>
> +#include <linux/slab.h>
> +#include <asm/scs.h>
> +
> +static struct kmem_cache *scs_cache;
> +
> +static void *scs_alloc(int node)
> +{
> + void *s;
> +
> + s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
> + if (s) {
> + *__scs_magic(s) = SCS_END_MAGIC;
> + /*
> + * Poison the allocation to catch unintentional accesses to
> + * the shadow stack when KASAN is enabled.
> + */
> + kasan_poison_object_data(scs_cache, s);
> + }
> +
> + return s;
> +}
> +
> +static void scs_free(void *s)
> +{
> + kasan_unpoison_object_data(scs_cache, s);
> + kmem_cache_free(scs_cache, s);
> +}
> +
> +void __init scs_init(void)
> +{
> + scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, SCS_SIZE,
> + 0, NULL);
> +}
> +
> +int scs_prepare(struct task_struct *tsk, int node)
> +{
> + void *s;
> +
> + s = scs_alloc(node);
> + if (!s)
> + return -ENOMEM;
> +
> + task_set_scs(tsk, s);
> + return 0;
> +}
> +
> +void scs_release(struct task_struct *tsk)
> +{
> + void *s;
> +
> + s = __scs_base(tsk);
> + if (!s)
> + return;
> +
> + WARN_ON(scs_corrupted(tsk));
> +

I'd like to have task_set_scs(tsk, NULL) retained here, to avoid need to
depend on the released task memory getting scrubbed at a later time.
Exposures of this pointer would expose the region of other SCSs.

-Kees

> + scs_free(s);
> +}
> --
> 2.26.1.301.g55bc3eb7cb9-goog
>

--
Kees Cook

2020-04-22 23:53:56

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v11 01/12] add support for Clang's Shadow Call Stack (SCS)

On Wed, Apr 22, 2020 at 06:39:47PM +0100, Will Deacon wrote:
> On Mon, Apr 20, 2020 at 02:18:30PM -0700, Sami Tolvanen wrote:
> > On Mon, Apr 20, 2020 at 06:17:28PM +0100, Will Deacon wrote:
> > > > + * The shadow call stack is aligned to SCS_SIZE, and grows
> > > > + * upwards, so we can mask out the low bits to extract the base
> > > > + * when the task is not running.
> > > > + */
> > > > + return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
> > >
> > > Could we avoid forcing this alignment it we stored the SCS pointer as a
> > > (base,offset) pair instead? That might be friendlier on the allocations
> > > later on.
> >
> > The idea is to avoid storing the current task's shadow stack address in
> > memory, which is why I would rather not store the base address either.
>
> What I mean is that, instead of storing the current shadow stack pointer,
> we instead store a base and an offset. We can still clear the base, as you
> do with the pointer today, and I don't see that the offset is useful to
> an attacker on its own.

I see what you mean. However, even if we store the base address +
the offset, we still need aligned allocation if we want to clear
the address. This would basically just move __scs_base() logic to
cpu_switch_to() / scs_save().

> But more generally, is it really worthwhile to do this clearing at all? Can
> you (or Kees?) provide some justification for it, please? We don't do it
> for anything else, e.g. the pointer authentication keys, so something
> feels amiss here.

Like Kees pointed out, this makes it slightly harder to locate the
current task's shadow stack pointer. I realize there are other useful
targets in thread_info, but we would rather not make this any easier
than necessary. Is your primary concern here the cost of doing this,
or just that it doesn't sufficiently improve security?

Sami

2020-04-22 23:55:56

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v12 03/12] scs: add support for stack usage debugging

On Wed, Apr 22, 2020 at 06:46:02PM +0100, Will Deacon wrote:
> > +static void scs_check_usage(struct task_struct *tsk)
> > +{
> > + static unsigned long highest;
> > + unsigned long used = __scs_used(tsk);
> > + unsigned long prev;
> > + unsigned long curr = highest;
> > +
> > + while (used > curr) {
> > + prev = cmpxchg(&highest, curr, used);
>
> I think this can be cmpxchg_relaxed(), since we don't care about ordering
> here afaict.

Sure, I'll change this in v13. Thanks.

Sami

2020-04-23 18:12:23

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v12 01/12] add support for Clang's Shadow Call Stack (SCS)

On Wed, Apr 22, 2020 at 07:00:40PM +0100, Will Deacon wrote:
> On Wed, Apr 22, 2020 at 10:54:45AM -0700, Kees Cook wrote:
> > On Mon, Apr 20, 2020 at 07:14:42PM -0700, Sami Tolvanen wrote:
> > > +void scs_release(struct task_struct *tsk)
> > > +{
> > > + void *s;
> > > +
> > > + s = __scs_base(tsk);
> > > + if (!s)
> > > + return;
> > > +
> > > + WARN_ON(scs_corrupted(tsk));
> > > +
> >
> > I'd like to have task_set_scs(tsk, NULL) retained here, to avoid need to
> > depend on the released task memory getting scrubbed at a later time.
>
> Hmm, doesn't it get zeroed almost immediately by kmem_cache_free() if
> INIT_ON_FREE_DEFAULT_ON is set? That seems much better than special-casing
> SCS, as there's a tonne of other useful stuff kicking around in the
> task_struct and treating this specially feels odd to me.

That's going to be an uncommon config except for the most paranoid of
system builders. :) Having this get wiped particular thing wiped is just
a decent best practice for what is otherwise treated as a "secret", just
like crypto routines wipe their secrets before free().

--
Kees Cook

2020-04-23 18:32:49

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v11 01/12] add support for Clang's Shadow Call Stack (SCS)

On Wed, Apr 22, 2020 at 04:51:34PM -0700, Sami Tolvanen wrote:
> On Wed, Apr 22, 2020 at 06:39:47PM +0100, Will Deacon wrote:
> > On Mon, Apr 20, 2020 at 02:18:30PM -0700, Sami Tolvanen wrote:
> > > On Mon, Apr 20, 2020 at 06:17:28PM +0100, Will Deacon wrote:
> > > > > + * The shadow call stack is aligned to SCS_SIZE, and grows
> > > > > + * upwards, so we can mask out the low bits to extract the base
> > > > > + * when the task is not running.
> > > > > + */
> > > > > + return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
> > > >
> > > > Could we avoid forcing this alignment it we stored the SCS pointer as a
> > > > (base,offset) pair instead? That might be friendlier on the allocations
> > > > later on.
> > >
> > > The idea is to avoid storing the current task's shadow stack address in
> > > memory, which is why I would rather not store the base address either.
> >
> > What I mean is that, instead of storing the current shadow stack pointer,
> > we instead store a base and an offset. We can still clear the base, as you
> > do with the pointer today, and I don't see that the offset is useful to
> > an attacker on its own.
>
> I see what you mean. However, even if we store the base address +
> the offset, we still need aligned allocation if we want to clear
> the address. This would basically just move __scs_base() logic to
> cpu_switch_to() / scs_save().

Okay, so, I feel like this has gotten off into the weeds, or I'm really
dense (or both). :) Going back to the original comment:

> > > > Could we avoid forcing this alignment it we stored the SCS
> > > > pointer as a (base,offset) pair instead? That might be friendlier
> > > > on the allocations later on.

I think there was some confusion about mixing the "we want to be able to
wipe the value" combined with the masking in __scs_base(). These are
unrelated, as was correctly observed with "We can still clear the base".

What I don't understand here is the suggestion to store two values:

Why is two better than storing one? With one, we only need a single access.

Why would storing the base be "friendlier on the allocations later on"?
This is coming out of a single kmem cache, in 1K chunks. They will be
naturally aligned to 1K (unless redzoing has been turned on for some
slab debugging reason). The base masking is a way to avoid needing to
store two values, and only happens at task death.

Storing two values eats memory for all tasks for seemingly no meaningful
common benefit. What am I missing here?

--
Kees Cook

2020-04-24 10:16:42

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v12 01/12] add support for Clang's Shadow Call Stack (SCS)

On Thu, Apr 23, 2020 at 11:09:24AM -0700, Kees Cook wrote:
> On Wed, Apr 22, 2020 at 07:00:40PM +0100, Will Deacon wrote:
> > On Wed, Apr 22, 2020 at 10:54:45AM -0700, Kees Cook wrote:
> > > On Mon, Apr 20, 2020 at 07:14:42PM -0700, Sami Tolvanen wrote:
> > > > +void scs_release(struct task_struct *tsk)
> > > > +{
> > > > + void *s;
> > > > +
> > > > + s = __scs_base(tsk);
> > > > + if (!s)
> > > > + return;
> > > > +
> > > > + WARN_ON(scs_corrupted(tsk));
> > > > +
> > >
> > > I'd like to have task_set_scs(tsk, NULL) retained here, to avoid need to
> > > depend on the released task memory getting scrubbed at a later time.
> >
> > Hmm, doesn't it get zeroed almost immediately by kmem_cache_free() if
> > INIT_ON_FREE_DEFAULT_ON is set? That seems much better than special-casing
> > SCS, as there's a tonne of other useful stuff kicking around in the
> > task_struct and treating this specially feels odd to me.
>
> That's going to be an uncommon config except for the most paranoid of
> system builders. :)

Sounds like a perfect fit, then ;)

> Having this get wiped particular thing wiped is just
> a decent best practice for what is otherwise treated as a "secret", just
> like crypto routines wipe their secrets before free().

Sorry, but I don't buy that analogy. The SCS pointer is stored in memory
all over the place and if it needs to treated in the same way as crypto
secrets then this whole thing needs rethinking. On top of that, where
crypto routines may wipe their secrets, we don't do what is being proposed
for the SCS pointer to other similar pieces of data, such as pointer
authentication keys.

Will

2020-04-24 11:23:17

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v11 01/12] add support for Clang's Shadow Call Stack (SCS)

On Thu, Apr 23, 2020 at 11:28:40AM -0700, Kees Cook wrote:
> On Wed, Apr 22, 2020 at 04:51:34PM -0700, Sami Tolvanen wrote:
> > On Wed, Apr 22, 2020 at 06:39:47PM +0100, Will Deacon wrote:
> > > On Mon, Apr 20, 2020 at 02:18:30PM -0700, Sami Tolvanen wrote:
> > > > On Mon, Apr 20, 2020 at 06:17:28PM +0100, Will Deacon wrote:
> > > > > > + * The shadow call stack is aligned to SCS_SIZE, and grows
> > > > > > + * upwards, so we can mask out the low bits to extract the base
> > > > > > + * when the task is not running.
> > > > > > + */
> > > > > > + return (void *)((unsigned long)task_scs(tsk) & ~(SCS_SIZE - 1));
> > > > >
> > > > > Could we avoid forcing this alignment it we stored the SCS pointer as a
> > > > > (base,offset) pair instead? That might be friendlier on the allocations
> > > > > later on.
> > > >
> > > > The idea is to avoid storing the current task's shadow stack address in
> > > > memory, which is why I would rather not store the base address either.
> > >
> > > What I mean is that, instead of storing the current shadow stack pointer,
> > > we instead store a base and an offset. We can still clear the base, as you
> > > do with the pointer today, and I don't see that the offset is useful to
> > > an attacker on its own.
> >
> > I see what you mean. However, even if we store the base address +
> > the offset, we still need aligned allocation if we want to clear
> > the address. This would basically just move __scs_base() logic to
> > cpu_switch_to() / scs_save().
>
> Okay, so, I feel like this has gotten off into the weeds, or I'm really
> dense (or both). :) Going back to the original comment:
>
> > > > > Could we avoid forcing this alignment it we stored the SCS
> > > > > pointer as a (base,offset) pair instead? That might be friendlier
> > > > > on the allocations later on.
>
> I think there was some confusion about mixing the "we want to be able to
> wipe the value" combined with the masking in __scs_base(). These are
> unrelated, as was correctly observed with "We can still clear the base".

Having just tried to implement this, it turns out they *are* related
and we can't still clear the base, I was wrong about that :( See below.

> What I don't understand here is the suggestion to store two values:
>
> Why is two better than storing one? With one, we only need a single access.
>
> Why would storing the base be "friendlier on the allocations later on"?
> This is coming out of a single kmem cache, in 1K chunks. They will be
> naturally aligned to 1K (unless redzoing has been turned on for some
> slab debugging reason). The base masking is a way to avoid needing to
> store two values, and only happens at task death.

Fair enough about the kmem_cache, although I'm still worried about these
things getting bigger in future and the alignment having to increase at
the same time. We also have a bunch of static/percpu allocations that don't
use this cache.

Also, since you mentioned the lack of redzoning, isn't it a bit dodgy
allocating blindly out of the kmem_cache? It means we don't have a redzone
or a guard page, so if you can trigger something like a recursion bug then
could you scribble past the SCS before the main stack overflows? Would this
clobber somebody else's SCS? The vmap version that I asked Sami to drop
is at least better in this regard, although the guard page is at the wrong
end of the stack and we just hope that the allocation below us didn't pass
VM_NO_GUARD. Looks like the same story for vmap stack :/

> Storing two values eats memory for all tasks for seemingly no meaningful
> common benefit. What am I missing here?

I would like to remove the alignment requirements for the static and percpu
allocations. AFAICT, the only reason the alignment is needed is because you
want to convert an SCS pointer into the base pointer. The only reason *that*
is needed is because of the questionable wiping of the pointer in the
thread_info, but I really don't see the benefit of this. Unlike a crypto
secret (which was your analogy), the SCS pointer is stored in memory in
at least the following situations:

* The task isn't running
* The task is running in userspace
* The task is running a vCPU in KVM
* We're calling into EFI
* On exception entry from EL1, as part of stacking x18
* During CPU suspend

If we split the pointer in two (base, offset) then we could leave the
base live in the thread_info, not require alignment of the stacks (which
may allow for unconditional redzoning?) and then just update the offset
value on context switch, which could be trivially checked as part of the
existing stack overflow checking on kernel entry.

The base and offset can live in the same cacheline and be loaded with ldp,
so I don't see there being an access cost compared to a single variable.

Am I missing something (modulo us not agreeing on the utility of wiping
the pointer)?

Will

2020-04-27 16:02:47

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v13 00/12] add support for Clang's Shadow Call Stack

This patch series adds support for Clang's Shadow Call Stack
(SCS) mitigation, which uses a separately allocated shadow stack
to protect against return address overwrites. More information
can be found here:

https://clang.llvm.org/docs/ShadowCallStack.html

SCS provides better protection against traditional buffer
overflows than CONFIG_STACKPROTECTOR_*, but it should be noted
that SCS security guarantees in the kernel differ from the ones
documented for user space. The kernel must store addresses of
shadow stacks in memory, which means an attacker capable of
reading and writing arbitrary memory may be able to locate them
and hijack control flow by modifying the shadow stacks.

SCS is currently supported only on arm64, where the compiler
requires the x18 register to be reserved for holding the current
task's shadow stack pointer.

With -fsanitize=shadow-call-stack, the compiler injects
instructions to all non-leaf C functions to store the return
address to the shadow stack, and unconditionally load it again
before returning. As a result, SCS is incompatible with features
that rely on modifying function return addresses in the kernel
stack to alter control flow. A copy of the return address is
still kept in the kernel stack for compatibility with stack
unwinding, for example.

SCS has a minimal performance overhead, but allocating
shadow stacks increases kernel memory usage. The feature is
therefore mostly useful on hardware that lacks support for PAC
instructions.

Changes in v13:
- Changed thread_info::shadow_call_stack to a base address and
an offset instead, and removed the now unneeded __scs_base()
and scs_save().
- Removed alignment from the kmem_cache and static allocations.
- Removed the task_set_scs() helper function.
- Moved the assembly code for loading and storing the offset in
thread_info to scs_load/save macros.
- Added offset checking to scs_corrupted().
- Switched to cmpxchg_relaxed() in scs_check_usage().

Changes in v12:
- Removed CONFIG_SHADOW_CALL_STACK_VMAP.
- Added CC_IS_CLANG as a dependency to CONFIG_SHADOW_CALL_STACK.
- Changed SCS_END_MAGIC to use POISON_POINTER_DELTA.
- Removed the unnecessary scs_set_magic() helper function.
- Moved scs_task_reset() and scs_corrupted() to scs.h, along with
__scs_magic() and __scs_base().
- Removed a redundant warning from memory allocation.
- Removed an unnecessary task_set_scs() call from scs_release().
- Changed the accounting code to calculate KiB instead of bytes.
- Replaced the lock in scs_check_usage() with a cmpxchg() loop.

Changes in v11:
- Rebased, added maintainers for kernel/ changes.

Changes in v10:
- Removed an unnecessary <asm/scs.h> include from head.S.

Changes in v9:
- Fixed grammar in the Kconfig help text.
- Changed Kconfig to allow SCS to be selected with the patchable-
function-entry graph tracer.
- Changed the EFI stub patch to not filter out -ffixed-x18, only
SCS flags.

Changes in v8:
- Added __noscs to __hyp_text instead of filtering SCS flags from
the entire arch/arm64/kvm/hyp directory.
- Added a patch to filter out -ffixed-x18 and SCS flags from the
EFI stub.

Changes in v7:
- Changed irq_stack_entry/exit to store the shadow stack pointer
in x24 instead of x20 as kernel_entry uses x20-x23 to store
data that can be used later. Updated the comment as well.
- Changed the Makefile in arch/arm64/kvm/hyp to also filter out
-ffixed-x18.
- Changed SHADOW_CALL_STACK to depend on !FUNCTION_GRAPH_TRACER
instead of not selecting HAVE_FUNCTION_GRAPH_TRACER with SCS.
- Removed ifdefs from the EFI wrapper and updated the comment to
explain why we are restoring x18.
- Rebased as Ard's x18 patches that were part of this series have
already been merged.

Changes in v6:
- Updated comment in the EFI RT wrapper to include the
explanation from the commit message.
- Fixed the SHADOW_CALL_STACK_VMAP config option and the
compilation errors in scs_init_irq()
- Updated the comment in entry.S to Mark's suggestion
- Fixed the WARN_ON in scs_init() to trip only when the return
value for cpuhp_setup_state() is < 0.
- Removed ifdefs from the code in arch/arm64/kernel/scs.c and
added separate shadow stacks for the SDEI handler

Changes in v5:
- Updated the comment in __scs_base() to Mark's suggestion
- Changed all instances of uintptr_t to unsigned long
- Added allocation poisoning for KASAN to catch unintentional
shadow stack accesses; moved set_set_magic before poisoning
and switched scs_used() and scs_corrupted() to access the
buffer using READ_ONCE_NOCHECK() instead
- Changed scs_free() to check for NULL instead of zero
- Renamed SCS_CACHE_SIZE to NR_CACHED_SCS
- Added a warning if cpuhp_setup_state fails in scs_init()
- Dropped patches disabling kretprobes after confirming there's
no functional conflict with SCS instrumentation
- Added an explanation to the commit message why function graph
tracing and SCS are incompatible
- Removed the ifdefs from arch/arm64/mm/proc.S and added
comments explaining why we are saving and restoring x18
- Updated scs_check_usage format to include process information

Changes in v4:
- Fixed authorship for Ard's patches
- Added missing commit messages
- Commented code that clears SCS from thread_info
- Added a comment about SCS_END_MAGIC being non-canonical

Changes in v3:
- Switched to filter-out for removing SCS flags in Makefiles
- Changed the __noscs attribute to use __no_sanitize__("...")
instead of no_sanitize("...")
- Cleaned up inline function definitions and moved task_scs()
into a macro
- Cleaned up scs_free() and scs_magic()
- Moved SCS initialization into dup_task_struct() and removed
the now unused scs_task_init()
- Added comments to __scs_base() and scs_task_reset() to better
document design choices
- Changed copy_page to make the offset and bias explicit

Changes in v2:
- Changed Ard's KVM patch to use x29 instead of x18 for the
guest context, which makes restore_callee_saved_regs cleaner
- Updated help text (and commit messages) to point out
differences in security properties compared to user space SCS
- Cleaned up config options: removed the ROP protection choice,
replaced the CC_IS_CLANG dependency with an arch-specific
cc-option test, and moved disabling of incompatible config
options to an arch-specific Kconfig
- Added CC_FLAGS_SCS, which are filtered out where needed
instead of using DISABLE_SCS
- Added a __has_feature guard around __noscs for older clang
versions

Sami Tolvanen (12):
add support for Clang's Shadow Call Stack (SCS)
scs: add accounting
scs: add support for stack usage debugging
scs: disable when function graph tracing is enabled
arm64: reserve x18 from general allocation with SCS
arm64: preserve x18 when CPU is suspended
arm64: efi: restore x18 if it was corrupted
arm64: vdso: disable Shadow Call Stack
arm64: disable SCS for hypervisor code
arm64: implement Shadow Call Stack
arm64: scs: add shadow stacks for SDEI
efi/libstub: disable SCS

Makefile | 6 ++
arch/Kconfig | 25 ++++++
arch/arm64/Kconfig | 5 ++
arch/arm64/Makefile | 4 +
arch/arm64/include/asm/kvm_hyp.h | 2 +-
arch/arm64/include/asm/scs.h | 46 ++++++++++
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/include/asm/thread_info.h | 13 +++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 4 +
arch/arm64/kernel/efi-rt-wrapper.S | 11 ++-
arch/arm64/kernel/entry.S | 38 +++++++-
arch/arm64/kernel/head.S | 6 ++
arch/arm64/kernel/process.c | 2 +
arch/arm64/kernel/scs.c | 20 +++++
arch/arm64/kernel/vdso/Makefile | 2 +-
arch/arm64/mm/proc.S | 14 +++
drivers/base/node.c | 6 ++
drivers/firmware/efi/libstub/Makefile | 3 +
fs/proc/meminfo.c | 4 +
include/linux/compiler-clang.h | 6 ++
include/linux/compiler_types.h | 4 +
include/linux/mmzone.h | 3 +
include/linux/scs.h | 72 +++++++++++++++
init/init_task.c | 8 ++
kernel/Makefile | 1 +
kernel/fork.c | 9 ++
kernel/sched/core.c | 2 +
kernel/scs.c | 121 ++++++++++++++++++++++++++
mm/page_alloc.c | 6 ++
mm/vmstat.c | 3 +
31 files changed, 442 insertions(+), 7 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c


base-commit: 6a8b55ed4056ea5559ebe4f6a4b247f627870d4c
--
2.26.2.303.gf8c07b1a785-goog

2020-04-27 16:03:12

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v13 04/12] scs: disable when function graph tracing is enabled

The graph tracer hooks returns by modifying frame records on the
(regular) stack, but with SCS the return address is taken from the
shadow stack, and the value in the frame record has no effect. As we
don't currently have a mechanism to determine the corresponding slot
on the shadow stack (and to pass this through the ftrace
infrastructure), for now let's disable SCS when the graph tracer is
enabled.

With SCS the return address is taken from the shadow stack and the
value in the frame record has no effect. The mcount based graph tracer
hooks returns by modifying frame records on the (regular) stack, and
thus is not compatible. The patchable-function-entry graph tracer
used for DYNAMIC_FTRACE_WITH_REGS modifies the LR before it is saved
to the shadow stack, and is compatible.

Modifying the mcount based graph tracer to work with SCS would require
a mechanism to determine the corresponding slot on the shadow stack
(and to pass this through the ftrace infrastructure), and we expect
that everyone will eventually move to the patchable-function-entry
based graph tracer anyway, so for now let's disable SCS when the
mcount-based graph tracer is enabled.

SCS and patchable-function-entry are both supported from LLVM 10.x.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
---
arch/Kconfig | 1 +
1 file changed, 1 insertion(+)

diff --git a/arch/Kconfig b/arch/Kconfig
index 334a3d9b19df..45dfca9a98d3 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -543,6 +543,7 @@ config ARCH_SUPPORTS_SHADOW_CALL_STACK
config SHADOW_CALL_STACK
bool "Clang Shadow Call Stack"
depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
+ depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER
help
This option enables Clang's Shadow Call Stack, which uses a
shadow stack to protect function return addresses from being
--
2.26.2.303.gf8c07b1a785-goog

2020-04-27 16:03:19

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v13 05/12] arm64: reserve x18 from general allocation with SCS

Reserve the x18 register from general allocation when SCS is enabled,
because the compiler uses the register to store the current task's
shadow stack pointer. Note that all external kernel modules must also be
compiled with -ffixed-x18 if the kernel has SCS enabled.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/Makefile | 4 ++++
1 file changed, 4 insertions(+)

diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile
index 85e4149cc5d5..409a6c1be8cc 100644
--- a/arch/arm64/Makefile
+++ b/arch/arm64/Makefile
@@ -81,6 +81,10 @@ endif

KBUILD_CFLAGS += $(branch-prot-flags-y)

+ifeq ($(CONFIG_SHADOW_CALL_STACK), y)
+KBUILD_CFLAGS += -ffixed-x18
+endif
+
ifeq ($(CONFIG_CPU_BIG_ENDIAN), y)
KBUILD_CPPFLAGS += -mbig-endian
CHECKFLAGS += -D__AARCH64EB__
--
2.26.2.303.gf8c07b1a785-goog

2020-04-27 16:03:24

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v13 03/12] scs: add support for stack usage debugging

Implements CONFIG_DEBUG_STACK_USAGE for shadow stacks. When enabled,
also prints out the highest shadow stack usage per process.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
kernel/scs.c | 38 ++++++++++++++++++++++++++++++++++++++
1 file changed, 38 insertions(+)

diff --git a/kernel/scs.c b/kernel/scs.c
index 8769016c714c..2a96573f2b1b 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -68,6 +68,43 @@ int scs_prepare(struct task_struct *tsk, int node)
return 0;
}

+#ifdef CONFIG_DEBUG_STACK_USAGE
+static unsigned long __scs_used(struct task_struct *tsk)
+{
+ unsigned long *p = task_scs(tsk);
+ unsigned long *end = __scs_magic(p);
+ unsigned long s = (unsigned long)p;
+
+ while (p < end && READ_ONCE_NOCHECK(*p))
+ p++;
+
+ return (unsigned long)p - s;
+}
+
+static void scs_check_usage(struct task_struct *tsk)
+{
+ static unsigned long highest;
+ unsigned long used = __scs_used(tsk);
+ unsigned long prev;
+ unsigned long curr = highest;
+
+ while (used > curr) {
+ prev = cmpxchg_relaxed(&highest, curr, used);
+
+ if (prev == curr) {
+ pr_info("%s (%d): highest shadow stack usage: "
+ "%lu bytes\n",
+ tsk->comm, task_pid_nr(tsk), used);
+ break;
+ }
+
+ curr = prev;
+ }
+}
+#else
+static inline void scs_check_usage(struct task_struct *tsk) {}
+#endif
+
void scs_release(struct task_struct *tsk)
{
void *s;
@@ -77,6 +114,7 @@ void scs_release(struct task_struct *tsk)
return;

WARN_ON(scs_corrupted(tsk));
+ scs_check_usage(tsk);

scs_account(tsk, -1);
scs_free(s);
--
2.26.2.303.gf8c07b1a785-goog

2020-04-27 16:04:18

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v13 11/12] arm64: scs: add shadow stacks for SDEI

This change adds per-CPU shadow call stacks for the SDEI handler.
Similarly to how the kernel stacks are handled, we add separate shadow
stacks for normal and critical events.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: James Morse <[email protected]>
Tested-by: James Morse <[email protected]>
---
arch/arm64/kernel/entry.S | 14 +++++++++++++-
arch/arm64/kernel/scs.c | 5 +++++
2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index 244268d5ae47..cb0516e6f963 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -1049,13 +1049,16 @@ SYM_CODE_START(__sdei_asm_handler)

mov x19, x1

+#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK)
+ ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
+#endif
+
#ifdef CONFIG_VMAP_STACK
/*
* entry.S may have been using sp as a scratch register, find whether
* this is a normal or critical event and switch to the appropriate
* stack for this CPU.
*/
- ldrb w4, [x19, #SDEI_EVENT_PRIORITY]
cbnz w4, 1f
ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6
b 2f
@@ -1065,6 +1068,15 @@ SYM_CODE_START(__sdei_asm_handler)
mov sp, x5
#endif

+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* Use a separate shadow call stack for normal and critical events */
+ cbnz w4, 3f
+ adr_this_cpu dst=x18, sym=sdei_shadow_call_stack_normal, tmp=x6
+ b 4f
+3: adr_this_cpu dst=x18, sym=sdei_shadow_call_stack_critical, tmp=x6
+4:
+#endif
+
/*
* We may have interrupted userspace, or a guest, or exit-from or
* return-to either of these. We can't trust sp_el0, restore it.
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
index acc6741d1a40..adc97f826fab 100644
--- a/arch/arm64/kernel/scs.c
+++ b/arch/arm64/kernel/scs.c
@@ -13,3 +13,8 @@
DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \

DEFINE_SCS(irq_shadow_call_stack);
+
+#ifdef CONFIG_ARM_SDE_INTERFACE
+DEFINE_SCS(sdei_shadow_call_stack_normal);
+DEFINE_SCS(sdei_shadow_call_stack_critical);
+#endif
--
2.26.2.303.gf8c07b1a785-goog

2020-04-27 16:04:20

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v13 10/12] arm64: implement Shadow Call Stack

This change implements shadow stack switching, initial SCS set-up,
and interrupt shadow stacks for arm64.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
---
arch/arm64/Kconfig | 5 +++
arch/arm64/include/asm/scs.h | 46 ++++++++++++++++++++++++++++
arch/arm64/include/asm/thread_info.h | 13 ++++++++
arch/arm64/kernel/Makefile | 1 +
arch/arm64/kernel/asm-offsets.c | 4 +++
arch/arm64/kernel/entry.S | 24 +++++++++++++--
arch/arm64/kernel/head.S | 6 ++++
arch/arm64/kernel/process.c | 2 ++
arch/arm64/kernel/scs.c | 15 +++++++++
9 files changed, 114 insertions(+), 2 deletions(-)
create mode 100644 arch/arm64/include/asm/scs.h
create mode 100644 arch/arm64/kernel/scs.c

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 40fb05d96c60..c380a16533f6 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -64,6 +64,7 @@ config ARM64
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_SUPPORTS_MEMORY_FAILURE
+ select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
select ARCH_SUPPORTS_NUMA_BALANCING
@@ -1025,6 +1026,10 @@ config ARCH_HAS_CACHE_LINE_SIZE
config ARCH_ENABLE_SPLIT_PMD_PTLOCK
def_bool y if PGTABLE_LEVELS > 2

+# Supported by clang >= 7.0
+config CC_HAVE_SHADOW_CALL_STACK
+ def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
+
config SECCOMP
bool "Enable seccomp to safely compute untrusted bytecode"
---help---
diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h
new file mode 100644
index 000000000000..96549353b0cb
--- /dev/null
+++ b/arch/arm64/include/asm/scs.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_SCS_H
+#define _ASM_SCS_H
+
+#ifdef __ASSEMBLY__
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ .macro scs_load tsk, tmp
+ ldp x18, \tmp, [\tsk, #TSK_TI_SCS_BASE]
+ add x18, x18, \tmp
+ .endm
+
+ .macro scs_save tsk, tmp
+ ldr \tmp, [\tsk, #TSK_TI_SCS_BASE]
+ sub \tmp, x18, \tmp
+ str \tmp, [\tsk, #TSK_TI_SCS_OFFSET]
+ .endm
+#else
+ .macro scs_load tsk, tmp
+ .endm
+
+ .macro scs_save tsk, tmp
+ .endm
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#else /* __ASSEMBLY__ */
+
+#include <linux/scs.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+static inline void scs_overflow_check(struct task_struct *tsk)
+{
+ if (unlikely(scs_corrupted(tsk)))
+ panic("corrupted shadow stack detected inside scheduler\n");
+}
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+static inline void scs_overflow_check(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* __ASSEMBLY __ */
+
+#endif /* _ASM_SCS_H */
diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h
index 512174a8e789..9df79c0a4c43 100644
--- a/arch/arm64/include/asm/thread_info.h
+++ b/arch/arm64/include/asm/thread_info.h
@@ -41,6 +41,10 @@ struct thread_info {
#endif
} preempt;
};
+#ifdef CONFIG_SHADOW_CALL_STACK
+ void *scs_base;
+ unsigned long scs_offset;
+#endif
};

#define thread_saved_pc(tsk) \
@@ -100,11 +104,20 @@ void arch_release_task_struct(struct task_struct *tsk);
_TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \
_TIF_SYSCALL_EMU)

+#ifdef CONFIG_SHADOW_CALL_STACK
+#define INIT_SCS \
+ .scs_base = init_shadow_call_stack, \
+ .scs_offset = 0,
+#else
+#define INIT_SCS
+#endif
+
#define INIT_THREAD_INFO(tsk) \
{ \
.flags = _TIF_FOREIGN_FPSTATE, \
.preempt_count = INIT_PREEMPT_COUNT, \
.addr_limit = KERNEL_DS, \
+ INIT_SCS \
}

#endif /* __ASM_THREAD_INFO_H */
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile
index 4e5b8ee31442..151f28521f1e 100644
--- a/arch/arm64/kernel/Makefile
+++ b/arch/arm64/kernel/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o
obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o
obj-$(CONFIG_ARM64_SSBD) += ssbd.o
obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-y += vdso/ probes/
obj-$(CONFIG_COMPAT_VDSO) += vdso32/
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 9981a0a5a87f..d7934250b68c 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -33,6 +33,10 @@ int main(void)
DEFINE(TSK_TI_ADDR_LIMIT, offsetof(struct task_struct, thread_info.addr_limit));
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
+#endif
+#ifdef CONFIG_SHADOW_CALL_STACK
+ DEFINE(TSK_TI_SCS_BASE, offsetof(struct task_struct, thread_info.scs_base));
+ DEFINE(TSK_TI_SCS_OFFSET, offsetof(struct task_struct, thread_info.scs_offset));
#endif
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
#ifdef CONFIG_STACKPROTECTOR
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ddcde093c433..244268d5ae47 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -23,6 +23,7 @@
#include <asm/mmu.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
+#include <asm/scs.h>
#include <asm/thread_info.h>
#include <asm/asm-uaccess.h>
#include <asm/unistd.h>
@@ -179,6 +180,8 @@ alternative_cb_end
apply_ssbd 1, x22, x23

ptrauth_keys_install_kernel tsk, 1, x20, x22, x23
+
+ scs_load tsk, x20
.else
add x21, sp, #S_FRAME_SIZE
get_current_task tsk
@@ -343,6 +346,8 @@ alternative_else_nop_endif
msr cntkctl_el1, x1
4:
#endif
+ scs_save tsk, x0
+
/* No kernel C function calls after this as user keys are set. */
ptrauth_keys_install_user tsk, x0, x1, x2

@@ -388,6 +393,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

.macro irq_stack_entry
mov x19, sp // preserve the original sp
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x24, x18 // preserve the original shadow stack
+#endif

/*
* Compare sp with the base of the task stack.
@@ -405,15 +413,25 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0

/* switch to the irq stack */
mov sp, x26
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+ /* also switch to the irq shadow stack */
+ adr_this_cpu x18, irq_shadow_call_stack, x26
+#endif
+
9998:
.endm

/*
- * x19 should be preserved between irq_stack_entry and
- * irq_stack_exit.
+ * The callee-saved regs (x19-x29) should be preserved between
+ * irq_stack_entry and irq_stack_exit, but note that kernel_entry
+ * uses x20-x23 to store data for later use.
*/
.macro irq_stack_exit
mov sp, x19
+#ifdef CONFIG_SHADOW_CALL_STACK
+ mov x18, x24
+#endif
.endm

/* GPRs used by entry code */
@@ -901,6 +919,8 @@ SYM_FUNC_START(cpu_switch_to)
mov sp, x9
msr sp_el0, x1
ptrauth_keys_install_kernel x1, 1, x8, x9, x10
+ scs_save x0, x8
+ scs_load x1, x8
ret
SYM_FUNC_END(cpu_switch_to)
NOKPROBE(cpu_switch_to)
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 57a91032b4c2..2b01c19c5483 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -27,6 +27,7 @@
#include <asm/pgtable-hwdef.h>
#include <asm/pgtable.h>
#include <asm/page.h>
+#include <asm/scs.h>
#include <asm/smp.h>
#include <asm/sysreg.h>
#include <asm/thread_info.h>
@@ -424,6 +425,10 @@ SYM_FUNC_START_LOCAL(__primary_switched)
stp xzr, x30, [sp, #-16]!
mov x29, sp

+#ifdef CONFIG_SHADOW_CALL_STACK
+ adr_l x18, init_shadow_call_stack // Set shadow call stack
+#endif
+
str_l x21, __fdt_pointer, x5 // Save FDT pointer

ldr_l x4, kimage_vaddr // Save the offset between
@@ -737,6 +742,7 @@ SYM_FUNC_START_LOCAL(__secondary_switched)
ldr x2, [x0, #CPU_BOOT_TASK]
cbz x2, __secondary_too_slow
msr sp_el0, x2
+ scs_load x2, x3
mov x29, #0
mov x30, #0
b secondary_start_kernel
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index 56be4cbf771f..a35d3318492c 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -52,6 +52,7 @@
#include <asm/mmu_context.h>
#include <asm/processor.h>
#include <asm/pointer_auth.h>
+#include <asm/scs.h>
#include <asm/stacktrace.h>

#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
@@ -515,6 +516,7 @@ __notrace_funcgraph struct task_struct *__switch_to(struct task_struct *prev,
entry_task_switch(next);
uao_thread_switch(next);
ssbs_thread_switch(next);
+ scs_overflow_check(next);

/*
* Complete any pending TLB or cache maintenance on this CPU in case
diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c
new file mode 100644
index 000000000000..acc6741d1a40
--- /dev/null
+++ b/arch/arm64/kernel/scs.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/percpu.h>
+#include <asm/scs.h>
+
+/* Allocate a static per-CPU shadow stack */
+#define DEFINE_SCS(name) \
+ DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \
+
+DEFINE_SCS(irq_shadow_call_stack);
--
2.26.2.303.gf8c07b1a785-goog

2020-04-27 16:04:49

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v13 08/12] arm64: vdso: disable Shadow Call Stack

Shadow stacks are only available in the kernel, so disable SCS
instrumentation for the vDSO.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/kernel/vdso/Makefile | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile
index dd2514bb1511..a87a4f11724e 100644
--- a/arch/arm64/kernel/vdso/Makefile
+++ b/arch/arm64/kernel/vdso/Makefile
@@ -25,7 +25,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING

VDSO_LDFLAGS := -Bsymbolic

-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS)
KBUILD_CFLAGS += $(DISABLE_LTO)
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.26.2.303.gf8c07b1a785-goog

2020-04-27 16:04:51

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v13 02/12] scs: add accounting

This change adds accounting for the memory allocated for shadow stacks.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
drivers/base/node.c | 6 ++++++
fs/proc/meminfo.c | 4 ++++
include/linux/mmzone.h | 3 +++
kernel/scs.c | 15 +++++++++++++++
mm/page_alloc.c | 6 ++++++
mm/vmstat.c | 3 +++
6 files changed, 37 insertions(+)

diff --git a/drivers/base/node.c b/drivers/base/node.c
index 10d7e818e118..50b8c0d43859 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -415,6 +415,9 @@ static ssize_t node_read_meminfo(struct device *dev,
"Node %d AnonPages: %8lu kB\n"
"Node %d Shmem: %8lu kB\n"
"Node %d KernelStack: %8lu kB\n"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ "Node %d ShadowCallStack:%8lu kB\n"
+#endif
"Node %d PageTables: %8lu kB\n"
"Node %d NFS_Unstable: %8lu kB\n"
"Node %d Bounce: %8lu kB\n"
@@ -438,6 +441,9 @@ static ssize_t node_read_meminfo(struct device *dev,
nid, K(node_page_state(pgdat, NR_ANON_MAPPED)),
nid, K(i.sharedram),
nid, sum_zone_node_page_state(nid, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ nid, sum_zone_node_page_state(nid, NR_KERNEL_SCS_KB),
+#endif
nid, K(sum_zone_node_page_state(nid, NR_PAGETABLE)),
nid, K(node_page_state(pgdat, NR_UNSTABLE_NFS)),
nid, K(sum_zone_node_page_state(nid, NR_BOUNCE)),
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8c1f1bb1a5ce..09cd51c8d23d 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -103,6 +103,10 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "SUnreclaim: ", sunreclaim);
seq_printf(m, "KernelStack: %8lu kB\n",
global_zone_page_state(NR_KERNEL_STACK_KB));
+#ifdef CONFIG_SHADOW_CALL_STACK
+ seq_printf(m, "ShadowCallStack:%8lu kB\n",
+ global_zone_page_state(NR_KERNEL_SCS_KB));
+#endif
show_val_kb(m, "PageTables: ",
global_zone_page_state(NR_PAGETABLE));

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1b9de7d220fb..acffc3bc6178 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -156,6 +156,9 @@ enum zone_stat_item {
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
NR_PAGETABLE, /* used for pagetables */
NR_KERNEL_STACK_KB, /* measured in KiB */
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ NR_KERNEL_SCS_KB, /* measured in KiB */
+#endif
/* Second 128 byte cacheline */
NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
diff --git a/kernel/scs.c b/kernel/scs.c
index 43624be9ad90..8769016c714c 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -6,8 +6,10 @@
*/

#include <linux/kasan.h>
+#include <linux/mm.h>
#include <linux/scs.h>
#include <linux/slab.h>
+#include <linux/vmstat.h>
#include <asm/scs.h>

static struct kmem_cache *scs_cache;
@@ -40,6 +42,17 @@ void __init scs_init(void)
scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, 0, 0, NULL);
}

+static struct page *__scs_page(struct task_struct *tsk)
+{
+ return virt_to_page(task_scs(tsk));
+}
+
+static void scs_account(struct task_struct *tsk, int account)
+{
+ mod_zone_page_state(page_zone(__scs_page(tsk)), NR_KERNEL_SCS_KB,
+ account * (SCS_SIZE / 1024));
+}
+
int scs_prepare(struct task_struct *tsk, int node)
{
void *s;
@@ -50,6 +63,7 @@ int scs_prepare(struct task_struct *tsk, int node)

task_scs(tsk) = s;
task_scs_offset(tsk) = 0;
+ scs_account(tsk, 1);

return 0;
}
@@ -64,5 +78,6 @@ void scs_release(struct task_struct *tsk)

WARN_ON(scs_corrupted(tsk));

+ scs_account(tsk, -1);
scs_free(s);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 69827d4fa052..83743d7a6177 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5411,6 +5411,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
" managed:%lukB"
" mlocked:%lukB"
" kernel_stack:%lukB"
+#ifdef CONFIG_SHADOW_CALL_STACK
+ " shadow_call_stack:%lukB"
+#endif
" pagetables:%lukB"
" bounce:%lukB"
" free_pcp:%lukB"
@@ -5433,6 +5436,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
K(zone_managed_pages(zone)),
K(zone_page_state(zone, NR_MLOCK)),
zone_page_state(zone, NR_KERNEL_STACK_KB),
+#ifdef CONFIG_SHADOW_CALL_STACK
+ zone_page_state(zone, NR_KERNEL_SCS_KB),
+#endif
K(zone_page_state(zone, NR_PAGETABLE)),
K(zone_page_state(zone, NR_BOUNCE)),
K(free_pcp),
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 96d21a792b57..2435d2c24657 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1119,6 +1119,9 @@ const char * const vmstat_text[] = {
"nr_mlock",
"nr_page_table_pages",
"nr_kernel_stack",
+#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
+ "nr_shadow_call_stack",
+#endif
"nr_bounce",
#if IS_ENABLED(CONFIG_ZSMALLOC)
"nr_zspages",
--
2.26.2.303.gf8c07b1a785-goog

2020-04-27 16:05:04

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v13 07/12] arm64: efi: restore x18 if it was corrupted

If we detect a corrupted x18, restore the register before jumping back
to potentially SCS instrumented code. This is safe, because the wrapper
is called with preemption disabled and a separate shadow stack is used
for interrupt handling.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/kernel/efi-rt-wrapper.S | 11 ++++++++++-
1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S
index 3fc71106cb2b..6ca6c0dc11a1 100644
--- a/arch/arm64/kernel/efi-rt-wrapper.S
+++ b/arch/arm64/kernel/efi-rt-wrapper.S
@@ -34,5 +34,14 @@ ENTRY(__efi_rt_asm_wrapper)
ldp x29, x30, [sp], #32
b.ne 0f
ret
-0: b efi_handle_corrupted_x18 // tail call
+0:
+ /*
+ * With CONFIG_SHADOW_CALL_STACK, the kernel uses x18 to store a
+ * shadow stack pointer, which we need to restore before returning to
+ * potentially instrumented code. This is safe because the wrapper is
+ * called with preemption disabled and a separate shadow stack is used
+ * for interrupts.
+ */
+ mov x18, x2
+ b efi_handle_corrupted_x18 // tail call
ENDPROC(__efi_rt_asm_wrapper)
--
2.26.2.303.gf8c07b1a785-goog

2020-04-27 16:05:28

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v13 01/12] add support for Clang's Shadow Call Stack (SCS)

This change adds generic support for Clang's Shadow Call Stack,
which uses a shadow stack to protect return addresses from being
overwritten by an attacker. Details are available here:

https://clang.llvm.org/docs/ShadowCallStack.html

Note that security guarantees in the kernel differ from the ones
documented for user space. The kernel must store addresses of
shadow stacks in memory, which means an attacker capable reading
and writing arbitrary memory may be able to locate them and hijack
control flow by modifying the stacks.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Miguel Ojeda <[email protected]>
---
Makefile | 6 +++
arch/Kconfig | 24 ++++++++++++
include/linux/compiler-clang.h | 6 +++
include/linux/compiler_types.h | 4 ++
include/linux/scs.h | 72 ++++++++++++++++++++++++++++++++++
init/init_task.c | 8 ++++
kernel/Makefile | 1 +
kernel/fork.c | 9 +++++
kernel/sched/core.c | 2 +
kernel/scs.c | 68 ++++++++++++++++++++++++++++++++
10 files changed, 200 insertions(+)
create mode 100644 include/linux/scs.h
create mode 100644 kernel/scs.c

diff --git a/Makefile b/Makefile
index 679f302a8b8b..33dc0d0cdd08 100644
--- a/Makefile
+++ b/Makefile
@@ -866,6 +866,12 @@ ifdef CONFIG_LIVEPATCH
KBUILD_CFLAGS += $(call cc-option, -flive-patching=inline-clone)
endif

+ifdef CONFIG_SHADOW_CALL_STACK
+CC_FLAGS_SCS := -fsanitize=shadow-call-stack
+KBUILD_CFLAGS += $(CC_FLAGS_SCS)
+export CC_FLAGS_SCS
+endif
+
# arch Makefile may override CC so keep this after arch Makefile is included
NOSTDINC_FLAGS += -nostdinc -isystem $(shell $(CC) -print-file-name=include)

diff --git a/arch/Kconfig b/arch/Kconfig
index 786a85d4ad40..334a3d9b19df 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -533,6 +533,30 @@ config STACKPROTECTOR_STRONG
about 20% of all kernel functions, which increases the kernel code
size by about 2%.

+config ARCH_SUPPORTS_SHADOW_CALL_STACK
+ bool
+ help
+ An architecture should select this if it supports Clang's Shadow
+ Call Stack, has asm/scs.h, and implements runtime support for shadow
+ stack switching.
+
+config SHADOW_CALL_STACK
+ bool "Clang Shadow Call Stack"
+ depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK
+ help
+ This option enables Clang's Shadow Call Stack, which uses a
+ shadow stack to protect function return addresses from being
+ overwritten by an attacker. More information can be found in
+ Clang's documentation:
+
+ https://clang.llvm.org/docs/ShadowCallStack.html
+
+ Note that security guarantees in the kernel differ from the
+ ones documented for user space. The kernel must store addresses
+ of shadow stacks in memory, which means an attacker capable of
+ reading and writing arbitrary memory may be able to locate them
+ and hijack control flow by modifying the stacks.
+
config HAVE_ARCH_WITHIN_STACK_FRAMES
bool
help
diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 333a6695a918..18fc4d29ef27 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -42,3 +42,9 @@
* compilers, like ICC.
*/
#define barrier() __asm__ __volatile__("" : : : "memory")
+
+#if __has_feature(shadow_call_stack)
+# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
+#else
+# define __noscs
+#endif
diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
index e970f97a7fcb..97b62f47a80d 100644
--- a/include/linux/compiler_types.h
+++ b/include/linux/compiler_types.h
@@ -193,6 +193,10 @@ struct ftrace_likely_data {
# define randomized_struct_fields_end
#endif

+#ifndef __noscs
+# define __noscs
+#endif
+
#ifndef asm_volatile_goto
#define asm_volatile_goto(x...) asm goto(x)
#endif
diff --git a/include/linux/scs.h b/include/linux/scs.h
new file mode 100644
index 000000000000..060eeb3d1390
--- /dev/null
+++ b/include/linux/scs.h
@@ -0,0 +1,72 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#ifndef _LINUX_SCS_H
+#define _LINUX_SCS_H
+
+#include <linux/gfp.h>
+#include <linux/poison.h>
+#include <linux/sched.h>
+#include <asm/page.h>
+
+#ifdef CONFIG_SHADOW_CALL_STACK
+
+/*
+ * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
+ * architecture) provided ~40% safety margin on stack usage while keeping
+ * memory allocation overhead reasonable.
+ */
+#define SCS_SIZE 1024UL
+#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
+
+/* An illegal pointer value to mark the end of the shadow stack. */
+#define SCS_END_MAGIC (0x5f6UL + POISON_POINTER_DELTA)
+
+#define task_scs(tsk) (task_thread_info(tsk)->scs_base)
+#define task_scs_offset(tsk) (task_thread_info(tsk)->scs_offset)
+
+extern void scs_init(void);
+
+static inline void scs_task_reset(struct task_struct *tsk)
+{
+ /*
+ * Reset the shadow stack to the base address in case the task
+ * is reused.
+ */
+ task_scs_offset(tsk) = 0;
+}
+
+extern int scs_prepare(struct task_struct *tsk, int node);
+
+static inline unsigned long *__scs_magic(void *s)
+{
+ return (unsigned long *)(s + SCS_SIZE) - 1;
+}
+
+static inline bool scs_corrupted(struct task_struct *tsk)
+{
+ unsigned long *magic = __scs_magic(task_scs(tsk));
+
+ return (task_scs_offset(tsk) >= SCS_SIZE - 1 ||
+ READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC);
+}
+
+extern void scs_release(struct task_struct *tsk);
+
+#else /* CONFIG_SHADOW_CALL_STACK */
+
+#define task_scs(tsk) NULL
+
+static inline void scs_init(void) {}
+static inline void scs_task_reset(struct task_struct *tsk) {}
+static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
+static inline bool scs_corrupted(struct task_struct *tsk) { return false; }
+static inline void scs_release(struct task_struct *tsk) {}
+
+#endif /* CONFIG_SHADOW_CALL_STACK */
+
+#endif /* _LINUX_SCS_H */
diff --git a/init/init_task.c b/init/init_task.c
index bd403ed3e418..169e34066d35 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -11,6 +11,7 @@
#include <linux/mm.h>
#include <linux/audit.h>
#include <linux/numa.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <linux/uaccess.h>
@@ -50,6 +51,13 @@ static struct sighand_struct init_sighand = {
.signalfd_wqh = __WAIT_QUEUE_HEAD_INITIALIZER(init_sighand.signalfd_wqh),
};

+#ifdef CONFIG_SHADOW_CALL_STACK
+unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)]
+ __init_task_data = {
+ [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC
+};
+#endif
+
/*
* Set up the first task table, touch at your own risk!. Base=0,
* limit=0x1fffff (=2MB)
diff --git a/kernel/Makefile b/kernel/Makefile
index 4cb4130ced32..c332eb9d4841 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -103,6 +103,7 @@ obj-$(CONFIG_TRACEPOINTS) += trace/
obj-$(CONFIG_IRQ_WORK) += irq_work.o
obj-$(CONFIG_CPU_PM) += cpu_pm.o
obj-$(CONFIG_BPF) += bpf/
+obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o

obj-$(CONFIG_PERF_EVENTS) += events/

diff --git a/kernel/fork.c b/kernel/fork.c
index 8c700f881d92..f6339f9d232d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -94,6 +94,7 @@
#include <linux/thread_info.h>
#include <linux/stackleak.h>
#include <linux/kasan.h>
+#include <linux/scs.h>

#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -456,6 +457,8 @@ void put_task_stack(struct task_struct *tsk)

void free_task(struct task_struct *tsk)
{
+ scs_release(tsk);
+
#ifndef CONFIG_THREAD_INFO_IN_TASK
/*
* The task is finally done with both the stack and thread_info,
@@ -840,6 +843,8 @@ void __init fork_init(void)
NULL, free_vm_stack_cache);
#endif

+ scs_init();
+
lockdep_init_task(&init_task);
uprobes_init();
}
@@ -899,6 +904,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
if (err)
goto free_stack;

+ err = scs_prepare(tsk, node);
+ if (err)
+ goto free_stack;
+
#ifdef CONFIG_SECCOMP
/*
* We must handle setting up seccomp filters once we're under
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9a2fbf98fd6f..934e03cfaec7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -11,6 +11,7 @@
#include <linux/nospec.h>

#include <linux/kcov.h>
+#include <linux/scs.h>

#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -6040,6 +6041,7 @@ void init_idle(struct task_struct *idle, int cpu)
idle->se.exec_start = sched_clock();
idle->flags |= PF_IDLE;

+ scs_task_reset(idle);
kasan_unpoison_task_stack(idle);

#ifdef CONFIG_SMP
diff --git a/kernel/scs.c b/kernel/scs.c
new file mode 100644
index 000000000000..43624be9ad90
--- /dev/null
+++ b/kernel/scs.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Shadow Call Stack support.
+ *
+ * Copyright (C) 2019 Google LLC
+ */
+
+#include <linux/kasan.h>
+#include <linux/scs.h>
+#include <linux/slab.h>
+#include <asm/scs.h>
+
+static struct kmem_cache *scs_cache;
+
+static void *scs_alloc(int node)
+{
+ void *s;
+
+ s = kmem_cache_alloc_node(scs_cache, GFP_SCS, node);
+ if (s) {
+ *__scs_magic(s) = SCS_END_MAGIC;
+ /*
+ * Poison the allocation to catch unintentional accesses to
+ * the shadow stack when KASAN is enabled.
+ */
+ kasan_poison_object_data(scs_cache, s);
+ }
+
+ return s;
+}
+
+static void scs_free(void *s)
+{
+ kasan_unpoison_object_data(scs_cache, s);
+ kmem_cache_free(scs_cache, s);
+}
+
+void __init scs_init(void)
+{
+ scs_cache = kmem_cache_create("scs_cache", SCS_SIZE, 0, 0, NULL);
+}
+
+int scs_prepare(struct task_struct *tsk, int node)
+{
+ void *s;
+
+ s = scs_alloc(node);
+ if (!s)
+ return -ENOMEM;
+
+ task_scs(tsk) = s;
+ task_scs_offset(tsk) = 0;
+
+ return 0;
+}
+
+void scs_release(struct task_struct *tsk)
+{
+ void *s;
+
+ s = task_scs(tsk);
+ if (!s)
+ return;
+
+ WARN_ON(scs_corrupted(tsk));
+
+ scs_free(s);
+}
--
2.26.2.303.gf8c07b1a785-goog

2020-04-27 16:05:49

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v13 06/12] arm64: preserve x18 when CPU is suspended

Don't lose the current task's shadow stack when the CPU is suspended.

Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Nick Desaulniers <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Reviewed-by: Mark Rutland <[email protected]>
Acked-by: Will Deacon <[email protected]>
---
arch/arm64/include/asm/suspend.h | 2 +-
arch/arm64/mm/proc.S | 14 ++++++++++++++
2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h
index 8939c87c4dce..0cde2f473971 100644
--- a/arch/arm64/include/asm/suspend.h
+++ b/arch/arm64/include/asm/suspend.h
@@ -2,7 +2,7 @@
#ifndef __ASM_SUSPEND_H
#define __ASM_SUSPEND_H

-#define NR_CTX_REGS 12
+#define NR_CTX_REGS 13
#define NR_CALLEE_SAVED_REGS 12

/*
diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S
index 197a9ba2d5ea..ed15be0f8103 100644
--- a/arch/arm64/mm/proc.S
+++ b/arch/arm64/mm/proc.S
@@ -58,6 +58,8 @@
* cpu_do_suspend - save CPU registers context
*
* x0: virtual address of context pointer
+ *
+ * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>.
*/
SYM_FUNC_START(cpu_do_suspend)
mrs x2, tpidr_el0
@@ -82,6 +84,11 @@ alternative_endif
stp x8, x9, [x0, #48]
stp x10, x11, [x0, #64]
stp x12, x13, [x0, #80]
+ /*
+ * Save x18 as it may be used as a platform register, e.g. by shadow
+ * call stack.
+ */
+ str x18, [x0, #96]
ret
SYM_FUNC_END(cpu_do_suspend)

@@ -98,6 +105,13 @@ SYM_FUNC_START(cpu_do_resume)
ldp x9, x10, [x0, #48]
ldp x11, x12, [x0, #64]
ldp x13, x14, [x0, #80]
+ /*
+ * Restore x18, as it may be used as a platform register, and clear
+ * the buffer to minimize the risk of exposure when used for shadow
+ * call stack.
+ */
+ ldr x18, [x0, #96]
+ str xzr, [x0, #96]
msr tpidr_el0, x2
msr tpidrro_el0, x3
msr contextidr_el1, x4
--
2.26.2.303.gf8c07b1a785-goog

2020-04-27 16:05:56

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v13 12/12] efi/libstub: disable SCS

Shadow stacks are not available in the EFI stub, filter out SCS flags.

Suggested-by: James Morse <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Ard Biesheuvel <[email protected]>
---
drivers/firmware/efi/libstub/Makefile | 3 +++
1 file changed, 3 insertions(+)

diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 094eabdecfe6..b52ae8c29560 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -32,6 +32,9 @@ KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
$(call cc-option,-fno-stack-protector) \
-D__DISABLE_EXPORTS

+# remove SCS flags from all objects in this directory
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+
GCOV_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
--
2.26.2.303.gf8c07b1a785-goog

2020-04-27 16:06:40

by Sami Tolvanen

[permalink] [raw]
Subject: [PATCH v13 09/12] arm64: disable SCS for hypervisor code

Disable SCS for code that runs at a different exception level by
adding __noscs to __hyp_text.

Suggested-by: James Morse <[email protected]>
Signed-off-by: Sami Tolvanen <[email protected]>
Reviewed-by: Kees Cook <[email protected]>
Acked-by: Marc Zyngier <[email protected]>
---
arch/arm64/include/asm/kvm_hyp.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h
index fe57f60f06a8..875b106c5d98 100644
--- a/arch/arm64/include/asm/kvm_hyp.h
+++ b/arch/arm64/include/asm/kvm_hyp.h
@@ -13,7 +13,7 @@
#include <asm/kvm_mmu.h>
#include <asm/sysreg.h>

-#define __hyp_text __section(.hyp.text) notrace
+#define __hyp_text __section(.hyp.text) notrace __noscs

#define read_sysreg_elx(r,nvh,vh) \
({ \
--
2.26.2.303.gf8c07b1a785-goog

2020-04-27 16:53:50

by Miguel Ojeda

[permalink] [raw]
Subject: Re: [PATCH v13 01/12] add support for Clang's Shadow Call Stack (SCS)

On Mon, Apr 27, 2020 at 6:00 PM Sami Tolvanen <[email protected]> wrote:
>
> diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
> index 333a6695a918..18fc4d29ef27 100644
> --- a/include/linux/compiler-clang.h
> +++ b/include/linux/compiler-clang.h
> @@ -42,3 +42,9 @@
> * compilers, like ICC.
> */
> #define barrier() __asm__ __volatile__("" : : : "memory")
> +
> +#if __has_feature(shadow_call_stack)
> +# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
> +#else
> +# define __noscs
> +#endif

Can we remove the `#else` branch? compiler_types.h [*] has to care
anyway about that case for other compilers anyway, no?

> diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h
> index e970f97a7fcb..97b62f47a80d 100644
> --- a/include/linux/compiler_types.h
> +++ b/include/linux/compiler_types.h
> @@ -193,6 +193,10 @@ struct ftrace_likely_data {
> # define randomized_struct_fields_end
> #endif
>
> +#ifndef __noscs
> +# define __noscs
> +#endif

[*] Here

Cheers,
Miguel

2020-04-27 17:04:56

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v13 01/12] add support for Clang's Shadow Call Stack (SCS)

On Mon, Apr 27, 2020 at 06:48:49PM +0200, Miguel Ojeda wrote:
> On Mon, Apr 27, 2020 at 6:00 PM Sami Tolvanen <[email protected]> wrote:
> >
> > diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
> > index 333a6695a918..18fc4d29ef27 100644
> > --- a/include/linux/compiler-clang.h
> > +++ b/include/linux/compiler-clang.h
> > @@ -42,3 +42,9 @@
> > * compilers, like ICC.
> > */
> > #define barrier() __asm__ __volatile__("" : : : "memory")
> > +
> > +#if __has_feature(shadow_call_stack)
> > +# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
> > +#else
> > +# define __noscs
> > +#endif
>
> Can we remove the `#else` branch? compiler_types.h [*] has to care
> anyway about that case for other compilers anyway, no?

Yes, it's unnecessary. I'll remove this in the next version. Thanks!

Sami

2020-04-27 17:43:52

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH v13 00/12] add support for Clang's Shadow Call Stack

On Mon, 27 Apr 2020 at 18:00, Sami Tolvanen <[email protected]> wrote:
>
> This patch series adds support for Clang's Shadow Call Stack
> (SCS) mitigation, which uses a separately allocated shadow stack
> to protect against return address overwrites. More information
> can be found here:
>
> https://clang.llvm.org/docs/ShadowCallStack.html
>
> SCS provides better protection against traditional buffer
> overflows than CONFIG_STACKPROTECTOR_*, but it should be noted
> that SCS security guarantees in the kernel differ from the ones
> documented for user space. The kernel must store addresses of
> shadow stacks in memory, which means an attacker capable of
> reading and writing arbitrary memory may be able to locate them
> and hijack control flow by modifying the shadow stacks.
>
> SCS is currently supported only on arm64, where the compiler
> requires the x18 register to be reserved for holding the current
> task's shadow stack pointer.
>
> With -fsanitize=shadow-call-stack, the compiler injects
> instructions to all non-leaf C functions to store the return
> address to the shadow stack, and unconditionally load it again
> before returning. As a result, SCS is incompatible with features
> that rely on modifying function return addresses in the kernel
> stack to alter control flow. A copy of the return address is
> still kept in the kernel stack for compatibility with stack
> unwinding, for example.
>
> SCS has a minimal performance overhead, but allocating
> shadow stacks increases kernel memory usage. The feature is
> therefore mostly useful on hardware that lacks support for PAC
> instructions.
>
> Changes in v13:
> - Changed thread_info::shadow_call_stack to a base address and
> an offset instead, and removed the now unneeded __scs_base()
> and scs_save().
> - Removed alignment from the kmem_cache and static allocations.
> - Removed the task_set_scs() helper function.
> - Moved the assembly code for loading and storing the offset in
> thread_info to scs_load/save macros.
> - Added offset checking to scs_corrupted().
> - Switched to cmpxchg_relaxed() in scs_check_usage().
>

OK, so one thing that came up in an offline discussion about SCS is
the way it interacts with the vmap'ed stack.

The vmap'ed stack is great for robustness, but it only works if things
don't explode for other reasons in the mean time. This means the
ordinary-to-shadow-call-stack size ratio should be chosen such that it
is *really* unlikely you could ever overflow the shadow call stack and
corrupt another task's call stack before hitting the vmap stack's
guard region.

Alternatively, I wonder if there is a way we could let the SCS and
ordinary stack share the [bottom of] the vmap'ed region. That would
give rather nasty results if the ordinary stack overflows into the
SCS, but for cases where we really recurse out of control, we could
catch this occurrence on either stack, whichever one occurs first. And
the nastiness -when it does occur- will not corrupt any state beyond
the stack of the current task.

2020-04-27 20:48:12

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v11 01/12] add support for Clang's Shadow Call Stack (SCS)

On Fri, Apr 24, 2020 at 12:21:14PM +0100, Will Deacon wrote:
> Also, since you mentioned the lack of redzoning, isn't it a bit dodgy
> allocating blindly out of the kmem_cache? It means we don't have a redzone
> or a guard page, so if you can trigger something like a recursion bug then
> could you scribble past the SCS before the main stack overflows? Would this
> clobber somebody else's SCS?

I agree that allocating from a kmem_cache isn't ideal for safety. It's a
compromise to reduce memory overhead.

> The vmap version that I asked Sami to drop
> is at least better in this regard, although the guard page is at the wrong
> end of the stack and we just hope that the allocation below us didn't pass
> VM_NO_GUARD. Looks like the same story for vmap stack :/

SCS grows up and the guard page is after the allocation, so how is it at
the wrong end? Am I missing something here?

> If we split the pointer in two (base, offset) then we could leave the
> base live in the thread_info, not require alignment of the stacks (which
> may allow for unconditional redzoning?) and then just update the offset
> value on context switch, which could be trivially checked as part of the
> existing stack overflow checking on kernel entry.

I sent out v13 with split pointers, but I'm not sure it's convenient to
add an overflow check to kernel_ventry where the VMAP_STACK check is
done. I suppose I could add a check to kernel_entry after we load x18
from tsk. Thoughts?

Sami

2020-04-27 20:52:39

by Ard Biesheuvel

[permalink] [raw]
Subject: Re: [PATCH v13 00/12] add support for Clang's Shadow Call Stack

On Mon, 27 Apr 2020 at 19:39, Ard Biesheuvel <[email protected]> wrote:
>
> On Mon, 27 Apr 2020 at 18:00, Sami Tolvanen <[email protected]> wrote:
> >
> > This patch series adds support for Clang's Shadow Call Stack
> > (SCS) mitigation, which uses a separately allocated shadow stack
> > to protect against return address overwrites. More information
> > can be found here:
> >
> > https://clang.llvm.org/docs/ShadowCallStack.html
> >
> > SCS provides better protection against traditional buffer
> > overflows than CONFIG_STACKPROTECTOR_*, but it should be noted
> > that SCS security guarantees in the kernel differ from the ones
> > documented for user space. The kernel must store addresses of
> > shadow stacks in memory, which means an attacker capable of
> > reading and writing arbitrary memory may be able to locate them
> > and hijack control flow by modifying the shadow stacks.
> >
> > SCS is currently supported only on arm64, where the compiler
> > requires the x18 register to be reserved for holding the current
> > task's shadow stack pointer.
> >
> > With -fsanitize=shadow-call-stack, the compiler injects
> > instructions to all non-leaf C functions to store the return
> > address to the shadow stack, and unconditionally load it again
> > before returning. As a result, SCS is incompatible with features
> > that rely on modifying function return addresses in the kernel
> > stack to alter control flow. A copy of the return address is
> > still kept in the kernel stack for compatibility with stack
> > unwinding, for example.
> >
> > SCS has a minimal performance overhead, but allocating
> > shadow stacks increases kernel memory usage. The feature is
> > therefore mostly useful on hardware that lacks support for PAC
> > instructions.
> >
> > Changes in v13:
> > - Changed thread_info::shadow_call_stack to a base address and
> > an offset instead, and removed the now unneeded __scs_base()
> > and scs_save().
> > - Removed alignment from the kmem_cache and static allocations.
> > - Removed the task_set_scs() helper function.
> > - Moved the assembly code for loading and storing the offset in
> > thread_info to scs_load/save macros.
> > - Added offset checking to scs_corrupted().
> > - Switched to cmpxchg_relaxed() in scs_check_usage().
> >
>
> OK, so one thing that came up in an offline discussion about SCS is
> the way it interacts with the vmap'ed stack.
>
> The vmap'ed stack is great for robustness, but it only works if things
> don't explode for other reasons in the mean time. This means the
> ordinary-to-shadow-call-stack size ratio should be chosen such that it
> is *really* unlikely you could ever overflow the shadow call stack and
> corrupt another task's call stack before hitting the vmap stack's
> guard region.
>
> Alternatively, I wonder if there is a way we could let the SCS and
> ordinary stack share the [bottom of] the vmap'ed region. That would
> give rather nasty results if the ordinary stack overflows into the
> SCS, but for cases where we really recurse out of control, we could
> catch this occurrence on either stack, whichever one occurs first. And
> the nastiness -when it does occur- will not corrupt any state beyond
> the stack of the current task.

Hmm, I guess that would make it quite hard to keep the SCS address
secret though :-(

2020-04-27 22:11:32

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v13 00/12] add support for Clang's Shadow Call Stack

On Mon, Apr 27, 2020 at 10:50:34PM +0200, Ard Biesheuvel wrote:
> > OK, so one thing that came up in an offline discussion about SCS is
> > the way it interacts with the vmap'ed stack.
> >
> > The vmap'ed stack is great for robustness, but it only works if things
> > don't explode for other reasons in the mean time. This means the
> > ordinary-to-shadow-call-stack size ratio should be chosen such that it
> > is *really* unlikely you could ever overflow the shadow call stack and
> > corrupt another task's call stack before hitting the vmap stack's
> > guard region.
> >
> > Alternatively, I wonder if there is a way we could let the SCS and
> > ordinary stack share the [bottom of] the vmap'ed region. That would
> > give rather nasty results if the ordinary stack overflows into the
> > SCS, but for cases where we really recurse out of control, we could
> > catch this occurrence on either stack, whichever one occurs first. And
> > the nastiness -when it does occur- will not corrupt any state beyond
> > the stack of the current task.
>
> Hmm, I guess that would make it quite hard to keep the SCS address
> secret though :-(

Yes, and the stack potentially overflowing into the SCS sort of defeats
the purpose. I'm fine with increasing the SCS size to something safer,
but using a vmapped shadow stack seems like the correct solution to this
problem, at least on devices where allocating a full page isn't an issue.

Sami

2020-04-29 08:44:13

by David Laight

[permalink] [raw]
Subject: RE: [PATCH v13 00/12] add support for Clang's Shadow Call Stack

From: Sami Tolvanen
> Sent: 27 April 2020 23:10
...
> > > Alternatively, I wonder if there is a way we could let the SCS and
> > > ordinary stack share the [bottom of] the vmap'ed region. That would
> > > give rather nasty results if the ordinary stack overflows into the
> > > SCS, but for cases where we really recurse out of control, we could
> > > catch this occurrence on either stack, whichever one occurs first. And
> > > the nastiness -when it does occur- will not corrupt any state beyond
> > > the stack of the current task.
> >
> > Hmm, I guess that would make it quite hard to keep the SCS address
> > secret though :-(
>
> Yes, and the stack potentially overflowing into the SCS sort of defeats
> the purpose. I'm fine with increasing the SCS size to something safer,
> but using a vmapped shadow stack seems like the correct solution to this
> problem, at least on devices where allocating a full page isn't an issue.

Wouldn't you do it the other way around - so shadow stack overflow
corrupts the bottom of the normal stack?
That can be detected 'after the fact' in a few places (eg process
switch and return to user)

Actually you might want to do syscall entry at the base of stack area,
then (effectively) allocate an on-stack buffer for the shadow stack.

I'd have though that kernel code could be the shadow stack address
by just reading r18?
Userspace isn't supposed to be able to get the main kernel stack
address either.

David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

2020-05-04 16:55:53

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v11 01/12] add support for Clang's Shadow Call Stack (SCS)

On Mon, Apr 27, 2020 at 01:45:46PM -0700, Sami Tolvanen wrote:
> On Fri, Apr 24, 2020 at 12:21:14PM +0100, Will Deacon wrote:
> > Also, since you mentioned the lack of redzoning, isn't it a bit dodgy
> > allocating blindly out of the kmem_cache? It means we don't have a redzone
> > or a guard page, so if you can trigger something like a recursion bug then
> > could you scribble past the SCS before the main stack overflows? Would this
> > clobber somebody else's SCS?
>
> I agree that allocating from a kmem_cache isn't ideal for safety. It's a
> compromise to reduce memory overhead.

Do you think it would be a problem if we always allocated a page for the
SCS?

> > The vmap version that I asked Sami to drop
> > is at least better in this regard, although the guard page is at the wrong
> > end of the stack and we just hope that the allocation below us didn't pass
> > VM_NO_GUARD. Looks like the same story for vmap stack :/
>
> SCS grows up and the guard page is after the allocation, so how is it at
> the wrong end? Am I missing something here?

Sorry, I'd got the SCS upside-down in my head (hey, that second 'S' stands
for 'Stack'!). But I think I'm right about vmap stack, which feels a
little fragile even though it seems to work out today with the very limited
uses of VM_NO_GUARD.

> > If we split the pointer in two (base, offset) then we could leave the
> > base live in the thread_info, not require alignment of the stacks (which
> > may allow for unconditional redzoning?) and then just update the offset
> > value on context switch, which could be trivially checked as part of the
> > existing stack overflow checking on kernel entry.
>
> I sent out v13 with split pointers, but I'm not sure it's convenient to
> add an overflow check to kernel_ventry where the VMAP_STACK check is
> done. I suppose I could add a check to kernel_entry after we load x18
> from tsk. Thoughts?

I'll take a look at v13, since at this stage I'm keen to get something
queued up so that we can use it as a base for further improvements without
you having to repost the whole stack every time.

Cheers,

Will

2020-05-04 18:09:11

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH v11 01/12] add support for Clang's Shadow Call Stack (SCS)

On Mon, May 04, 2020 at 05:52:28PM +0100, Will Deacon wrote:
> On Mon, Apr 27, 2020 at 01:45:46PM -0700, Sami Tolvanen wrote:
> > On Fri, Apr 24, 2020 at 12:21:14PM +0100, Will Deacon wrote:
> > > The vmap version that I asked Sami to drop
> > > is at least better in this regard, although the guard page is at the wrong
> > > end of the stack and we just hope that the allocation below us didn't pass
> > > VM_NO_GUARD. Looks like the same story for vmap stack :/
> >
> > SCS grows up and the guard page is after the allocation, so how is it at
> > the wrong end? Am I missing something here?
>
> Sorry, I'd got the SCS upside-down in my head (hey, that second 'S' stands
> for 'Stack'!). But I think I'm right about vmap stack, which feels a
> little fragile even though it seems to work out today with the very limited
> uses of VM_NO_GUARD.

Yeah, when VMAP_STACK was originally being developed, IIRC, there was
an effort made to eliminate all the users of VM_NO_GUARD, and it looks
like it's mostly there. Really the only use left is arm64's kernel image
mapping routines, and then it's not actually used in the traditional
sense -- it's just a boolean for whether to toss in a guard page at the
end of the data section, and the VMAs are built manually. I think that
code could actually be refactored to drop it too and then the only user
would be KASAN, which, IIUC, wants to build consecutive vmap areas.

--
Kees Cook

2020-05-04 18:47:11

by Sami Tolvanen

[permalink] [raw]
Subject: Re: [PATCH v11 01/12] add support for Clang's Shadow Call Stack (SCS)

On Mon, May 04, 2020 at 05:52:28PM +0100, Will Deacon wrote:
> On Mon, Apr 27, 2020 at 01:45:46PM -0700, Sami Tolvanen wrote:
> > I agree that allocating from a kmem_cache isn't ideal for safety. It's a
> > compromise to reduce memory overhead.
>
> Do you think it would be a problem if we always allocated a page for the
> SCS?

Yes, the memory overhead was deemed too large for Android devices, which
have thousands of threads running.

Sami

2020-05-04 18:49:42

by Jann Horn

[permalink] [raw]
Subject: Re: [PATCH v11 01/12] add support for Clang's Shadow Call Stack (SCS)

On Mon, May 4, 2020 at 6:52 PM Will Deacon <[email protected]> wrote:
> On Mon, Apr 27, 2020 at 01:45:46PM -0700, Sami Tolvanen wrote:
> > On Fri, Apr 24, 2020 at 12:21:14PM +0100, Will Deacon wrote:
> > > Also, since you mentioned the lack of redzoning, isn't it a bit dodgy
> > > allocating blindly out of the kmem_cache? It means we don't have a redzone
> > > or a guard page, so if you can trigger something like a recursion bug then
> > > could you scribble past the SCS before the main stack overflows? Would this
> > > clobber somebody else's SCS?
> >
> > I agree that allocating from a kmem_cache isn't ideal for safety. It's a
> > compromise to reduce memory overhead.
>
> Do you think it would be a problem if we always allocated a page for the
> SCS?

I guess doing this safely and without wasting a page per task would
only be possible in an elegant way once MTE lands on devices?

I wonder how bad context switch latency would be if the actual SCS was
percpu and vmapped (starting at an offset inside the page such that
the SCS can only grow up to something like 0x400 bytes before
panicking the CPU) and the context switch path saved/restored the used
part of the vmapped SCS into a smaller allocation from the slab
allocator... presumably the SCS will usually just be something like
one cacheline big? That probably only costs a moderate amount of time
to copy...
Or as an extension of that, if the SCS copying turns out to be too
costly, there could be a percpu LRU cache consisting of vmapped SCS
pages, and whenever a task gets scheduled that doesn't have a vmapped
SCS, it "swaps out" the contents of the least recently used vmapped
SCS into the corresponding task's slab SCS, and "swaps in" from its
own slab SCS into the vmapped SCS. And task migration would force
"swapping out".

Not sure if this is a good idea, or if I'm just making things worse by
suggesting extra complexity...

2020-05-15 17:26:10

by Will Deacon

[permalink] [raw]
Subject: Re: [PATCH v13 00/12] add support for Clang's Shadow Call Stack

Hi Sami,

On Mon, Apr 27, 2020 at 09:00:06AM -0700, Sami Tolvanen wrote:
> This patch series adds support for Clang's Shadow Call Stack
> (SCS) mitigation, which uses a separately allocated shadow stack
> to protect against return address overwrites. More information
> can be found here:
>
> https://clang.llvm.org/docs/ShadowCallStack.html

I'm planning to queue this with the (mostly cosmetic) diff below folded in.
I also have some extra patches on top which I'll send out shortly for
review.

However, I really think we need to get to the bottom of the size issue
since I'm highly sceptical about not being able to afford a full page
for the shadow stack allocation. We can change this later so it needn't
hold up the patchset, but given that Android is the only user, I'd like
to make sure that if we change to use a full page upstream then that is
also acceptable in AOSP.

Thanks,

Will

--->8

diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h
index 18fc4d29ef27..790c0c6b8552 100644
--- a/include/linux/compiler-clang.h
+++ b/include/linux/compiler-clang.h
@@ -45,6 +45,4 @@

#if __has_feature(shadow_call_stack)
# define __noscs __attribute__((__no_sanitize__("shadow-call-stack")))
-#else
-# define __noscs
#endif
diff --git a/include/linux/scs.h b/include/linux/scs.h
index 060eeb3d1390..3f3662621a27 100644
--- a/include/linux/scs.h
+++ b/include/linux/scs.h
@@ -11,7 +11,7 @@
#include <linux/gfp.h>
#include <linux/poison.h>
#include <linux/sched.h>
-#include <asm/page.h>
+#include <linux/sizes.h>

#ifdef CONFIG_SHADOW_CALL_STACK

@@ -20,7 +20,7 @@
* architecture) provided ~40% safety margin on stack usage while keeping
* memory allocation overhead reasonable.
*/
-#define SCS_SIZE 1024UL
+#define SCS_SIZE SZ_1K
#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)

/* An illegal pointer value to mark the end of the shadow stack. */
@@ -29,7 +29,9 @@
#define task_scs(tsk) (task_thread_info(tsk)->scs_base)
#define task_scs_offset(tsk) (task_thread_info(tsk)->scs_offset)

-extern void scs_init(void);
+void scs_init(void);
+int scs_prepare(struct task_struct *tsk, int node);
+void scs_release(struct task_struct *tsk);

static inline void scs_task_reset(struct task_struct *tsk)
{
@@ -40,8 +42,6 @@ static inline void scs_task_reset(struct task_struct *tsk)
task_scs_offset(tsk) = 0;
}

-extern int scs_prepare(struct task_struct *tsk, int node);
-
static inline unsigned long *__scs_magic(void *s)
{
return (unsigned long *)(s + SCS_SIZE) - 1;
@@ -55,12 +55,8 @@ static inline bool scs_corrupted(struct task_struct *tsk)
READ_ONCE_NOCHECK(*magic) != SCS_END_MAGIC);
}

-extern void scs_release(struct task_struct *tsk);
-
#else /* CONFIG_SHADOW_CALL_STACK */

-#define task_scs(tsk) NULL
-
static inline void scs_init(void) {}
static inline void scs_task_reset(struct task_struct *tsk) {}
static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
diff --git a/kernel/scs.c b/kernel/scs.c
index 2a96573f2b1b..9389c28f0853 100644
--- a/kernel/scs.c
+++ b/kernel/scs.c
@@ -55,45 +55,37 @@ static void scs_account(struct task_struct *tsk, int account)

int scs_prepare(struct task_struct *tsk, int node)
{
- void *s;
+ void *s = scs_alloc(node);

- s = scs_alloc(node);
if (!s)
return -ENOMEM;

task_scs(tsk) = s;
task_scs_offset(tsk) = 0;
scs_account(tsk, 1);
-
return 0;
}

-#ifdef CONFIG_DEBUG_STACK_USAGE
-static unsigned long __scs_used(struct task_struct *tsk)
+static void scs_check_usage(struct task_struct *tsk)
{
- unsigned long *p = task_scs(tsk);
- unsigned long *end = __scs_magic(p);
- unsigned long s = (unsigned long)p;
+ static unsigned long highest;

- while (p < end && READ_ONCE_NOCHECK(*p))
- p++;
+ unsigned long *p, prev, curr = highest, used = 0;

- return (unsigned long)p - s;
-}
+ if (!IS_ENABLED(CONFIG_DEBUG_STACK_USAGE))
+ return;

-static void scs_check_usage(struct task_struct *tsk)
-{
- static unsigned long highest;
- unsigned long used = __scs_used(tsk);
- unsigned long prev;
- unsigned long curr = highest;
+ for (p = task_scs(tsk); p < __scs_magic(tsk); ++p) {
+ if (!READ_ONCE_NOCHECK(*p))
+ break;
+ used++;
+ }

while (used > curr) {
prev = cmpxchg_relaxed(&highest, curr, used);

if (prev == curr) {
- pr_info("%s (%d): highest shadow stack usage: "
- "%lu bytes\n",
+ pr_info("%s (%d): highest shadow stack usage: %lu bytes\n",
tsk->comm, task_pid_nr(tsk), used);
break;
}
@@ -101,21 +93,16 @@ static void scs_check_usage(struct task_struct *tsk)
curr = prev;
}
}
-#else
-static inline void scs_check_usage(struct task_struct *tsk) {}
-#endif

void scs_release(struct task_struct *tsk)
{
- void *s;
+ void *s = task_scs(tsk);

- s = task_scs(tsk);
if (!s)
return;

- WARN_ON(scs_corrupted(tsk));
+ WARN(scs_corrupted(tsk), "corrupted shadow stack detected when freeing task\n");
scs_check_usage(tsk);
-
scs_account(tsk, -1);
scs_free(s);
}