This is the arm64 version of the STACKLEAK plugin originall from
grsecurity. See
https://marc.info/?l=kernel-hardening&m=151880470609808 for the
full x86 version. This is based on top of Kees' branch for stackleak
and has been cleaned up to use a few macros from that branch.
Comments welcome, if there are no major objections Kees will queue this
up to get some CI testing. This passed both of the LKDTM tests.
Laura Abbott (2):
stackleak: Update for arm64
arm64: Clear the stack
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/processor.h | 6 ++
arch/arm64/kernel/asm-offsets.c | 3 +
arch/arm64/kernel/entry.S | 108 +++++++++++++++++++++++++++++++++
arch/arm64/kernel/process.c | 16 +++++
drivers/firmware/efi/libstub/Makefile | 3 +-
scripts/Makefile.gcc-plugins | 5 +-
scripts/gcc-plugins/stackleak_plugin.c | 5 ++
8 files changed, 145 insertions(+), 2 deletions(-)
--
2.14.3
arm64 has another layer of indirection in the RTL.
Account for this in the plugin.
Signed-off-by: Laura Abbott <[email protected]>
---
scripts/gcc-plugins/stackleak_plugin.c | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c
index 6fc991c98d8b..7dfaa027423f 100644
--- a/scripts/gcc-plugins/stackleak_plugin.c
+++ b/scripts/gcc-plugins/stackleak_plugin.c
@@ -244,6 +244,11 @@ static unsigned int stackleak_final_execute(void)
* that insn.
*/
body = PATTERN(insn);
+ /* arm64 is different */
+ if (GET_CODE(body) == PARALLEL) {
+ body = XEXP(body, 0);
+ body = XEXP(body, 0);
+ }
if (GET_CODE(body) != CALL)
continue;
--
2.14.3
Implementation of stackleak based heavily on the x86 version
Signed-off-by: Laura Abbott <[email protected]>
---
arch/arm64/Kconfig | 1 +
arch/arm64/include/asm/processor.h | 6 ++
arch/arm64/kernel/asm-offsets.c | 3 +
arch/arm64/kernel/entry.S | 108 ++++++++++++++++++++++++++++++++++
arch/arm64/kernel/process.c | 16 +++++
drivers/firmware/efi/libstub/Makefile | 3 +-
scripts/Makefile.gcc-plugins | 5 +-
7 files changed, 140 insertions(+), 2 deletions(-)
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 7381eeb7ef8e..dcadcae674a7 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -92,6 +92,7 @@ config ARM64
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_SECCOMP_FILTER
+ select HAVE_ARCH_STACKLEAK
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h
index fce604e3e599..4b309101ac83 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -114,6 +114,12 @@ struct thread_struct {
unsigned long fault_address; /* fault info */
unsigned long fault_code; /* ESR_EL1 value */
struct debug_info debug; /* debugging */
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ unsigned long lowest_stack;
+#ifdef CONFIG_STACKLEAK_METRICS
+ unsigned long prev_lowest_stack;
+#endif
+#endif
};
/*
diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
index 1303e04110cd..b5c6100e8b14 100644
--- a/arch/arm64/kernel/asm-offsets.c
+++ b/arch/arm64/kernel/asm-offsets.c
@@ -45,6 +45,9 @@ int main(void)
DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0));
#endif
DEFINE(TSK_STACK, offsetof(struct task_struct, stack));
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ DEFINE(TSK_TI_LOWEST_STACK, offsetof(struct task_struct, thread.lowest_stack));
+#endif
BLANK();
DEFINE(THREAD_CPU_CONTEXT, offsetof(struct task_struct, thread.cpu_context));
BLANK();
diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
index ec2ee720e33e..b909b436293a 100644
--- a/arch/arm64/kernel/entry.S
+++ b/arch/arm64/kernel/entry.S
@@ -401,6 +401,11 @@ tsk .req x28 // current thread_info
.text
+ .macro erase_kstack
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ bl __erase_kstack
+#endif
+ .endm
/*
* Exception vectors.
*/
@@ -901,6 +906,7 @@ work_pending:
*/
ret_to_user:
disable_daif
+ erase_kstack
ldr x1, [tsk, #TSK_TI_FLAGS]
and x2, x1, #_TIF_WORK_MASK
cbnz x2, work_pending
@@ -1337,3 +1343,105 @@ alternative_else_nop_endif
ENDPROC(__sdei_asm_handler)
NOKPROBE(__sdei_asm_handler)
#endif /* CONFIG_ARM_SDE_INTERFACE */
+
+/*
+ * This is what the stack looks like
+ *
+ * +---+ <- task_stack_page(p) + THREAD_SIZE
+ * | |
+ * +---+ <- task_stack_page(p) + THREAD_START_SP
+ * | |
+ * | |
+ * +---+ <- task_pt_regs(p)
+ * | |
+ * | |
+ * | | <- current_sp
+ * ~~~~~
+ *
+ * ~~~~~
+ * | | <- lowest_stack
+ * | |
+ * | |
+ * +---+ <- task_stack_page(p)
+ *
+ * This function is desgned to poison the memory between the lowest_stack
+ * and the current stack pointer. After clearing the stack, the lowest
+ * stack is reset.
+ */
+
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ENTRY(__erase_kstack)
+ mov x10, x0 // save x0 for the fast path
+
+ get_thread_info x0
+ ldr x1, [x0, #TSK_TI_LOWEST_STACK]
+
+ /* get the number of bytes to check for lowest stack */
+ mov x3, x1
+ and x3, x3, #THREAD_SIZE - 1
+ lsr x3, x3, #3
+
+ /* generate addresses from the bottom of the stack */
+ mov x4, sp
+ movn x2, #THREAD_SIZE - 1
+ and x1, x4, x2
+
+ mov x2, #STACKLEAK_POISON
+
+ mov x5, #0
+1:
+ /*
+ * As borrowed from the x86 logic, start from the lowest_stack
+ * and go to the bottom to find the poison value.
+ * The check of 16 is to hopefully avoid false positives.
+ */
+ cbz x3, 4f
+ ldr x4, [x1, x3, lsl #3]
+ cmp x4, x2
+ csinc x5, xzr, x5, ne
+ tbnz x5, #STACKLEAK_POISON_CHECK_DEPTH/4, 4f // found 16 poisons?
+ sub x3, x3, #1
+ b 1b
+
+4:
+ /* total number of bytes to poison */
+ add x5, x1, x3, lsl #3
+ mov x4, sp
+ sub x8, x4, x5
+
+ cmp x8, #THREAD_SIZE // sanity check the range
+ b.lo 5f
+ ASM_BUG()
+
+5:
+ /*
+ * We may have hit a path where the stack did not get used,
+ * no need to do anything here
+ */
+ cbz x8, 7f
+
+ sub x8, x8, #1 // don't poison the current stack pointer
+
+ lsr x8, x8, #3
+ add x3, x3, x8
+
+ /*
+ * The logic of this loop ensures the last stack word isn't
+ * ovewritten.
+ */
+6:
+ cbz x8, 7f
+ str x2, [x1, x3, lsl #3]
+ sub x3, x3, #1
+ sub x8, x8, #1
+ b 6b
+
+ /* Reset the lowest stack to the top of the stack */
+7:
+ mov x1, sp
+ str x1, [x0, #TSK_TI_LOWEST_STACK]
+
+ mov x0, x10
+ ret
+ENDPROC(__erase_kstack)
+#endif
diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
index ad8aeb098b31..fd0528db6772 100644
--- a/arch/arm64/kernel/process.c
+++ b/arch/arm64/kernel/process.c
@@ -357,6 +357,9 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
p->thread.cpu_context.pc = (unsigned long)ret_from_fork;
p->thread.cpu_context.sp = (unsigned long)childregs;
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ p->thread.lowest_stack = (unsigned long)task_stack_page(p);
+#endif
ptrace_hw_copy_thread(p);
return 0;
@@ -486,3 +489,16 @@ void arch_setup_new_exec(void)
{
current->mm->context.flags = is_compat_task() ? MMCF_AARCH32 : 0;
}
+
+#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+void __used check_alloca(unsigned long size)
+{
+ unsigned long sp, stack_left;
+
+ sp = current_stack_pointer;
+
+ stack_left = sp & (THREAD_SIZE - 1);
+ BUG_ON(stack_left < 256 || size >= stack_left - 256);
+}
+EXPORT_SYMBOL(check_alloca);
+#endif
diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
index 7b3ba40f0745..35ebbc1b17ff 100644
--- a/drivers/firmware/efi/libstub/Makefile
+++ b/drivers/firmware/efi/libstub/Makefile
@@ -20,7 +20,8 @@ cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt
KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
-D__NO_FORTIFY \
$(call cc-option,-ffreestanding) \
- $(call cc-option,-fno-stack-protector)
+ $(call cc-option,-fno-stack-protector) \
+ $(DISABLE_STACKLEAK_PLUGIN)
GCOV_PROFILE := n
KASAN_SANITIZE := n
diff --git a/scripts/Makefile.gcc-plugins b/scripts/Makefile.gcc-plugins
index 8d6070fc538f..6cc0e35d3324 100644
--- a/scripts/Makefile.gcc-plugins
+++ b/scripts/Makefile.gcc-plugins
@@ -37,11 +37,14 @@ ifdef CONFIG_GCC_PLUGINS
gcc-plugin-$(CONFIG_GCC_PLUGIN_STACKLEAK) += stackleak_plugin.so
gcc-plugin-cflags-$(CONFIG_GCC_PLUGIN_STACKLEAK) += -DSTACKLEAK_PLUGIN -fplugin-arg-stackleak_plugin-track-min-size=$(CONFIG_STACKLEAK_TRACK_MIN_SIZE)
+ ifdef CONFIG_GCC_PLUGIN_STACKLEAK
+ DISABLE_STACKLEAK_PLUGIN += -fplugin-arg-stackleak_plugin-disable
+ endif
GCC_PLUGINS_CFLAGS := $(strip $(addprefix -fplugin=$(objtree)/scripts/gcc-plugins/, $(gcc-plugin-y)) $(gcc-plugin-cflags-y))
export PLUGINCC GCC_PLUGINS_CFLAGS GCC_PLUGIN GCC_PLUGIN_SUBDIR
- export SANCOV_PLUGIN DISABLE_LATENT_ENTROPY_PLUGIN
+ export SANCOV_PLUGIN DISABLE_LATENT_ENTROPY_PLUGIN DISABLE_STACKLEAK_PLUGIN
ifneq ($(PLUGINCC),)
# SANCOV_PLUGIN can be only in CFLAGS_KCOV because avoid duplication.
--
2.14.3
On 21.02.2018 04:13, Laura Abbott wrote:
> This is the arm64 version of the STACKLEAK plugin originall from
> grsecurity. See
> https://marc.info/?l=kernel-hardening&m=151880470609808 for the
> full x86 version. This is based on top of Kees' branch for stackleak
> and has been cleaned up to use a few macros from that branch.
>
> Comments welcome, if there are no major objections Kees will queue this
> up to get some CI testing. This passed both of the LKDTM tests.
Hello, Laura,
Thank you. I'll take some time to learn your patches and test them on my LeMaker
HiKey board. I'll return with the feedback.
Best regards,
Alexander
Hi Laura,
On Tue, Feb 20, 2018 at 05:13:03PM -0800, Laura Abbott wrote:
> Implementation of stackleak based heavily on the x86 version
Neat!
> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
> index ec2ee720e33e..b909b436293a 100644
> --- a/arch/arm64/kernel/entry.S
> +++ b/arch/arm64/kernel/entry.S
> @@ -401,6 +401,11 @@ tsk .req x28 // current thread_info
>
> .text
>
> + .macro erase_kstack
> +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
> + bl __erase_kstack
> +#endif
> + .endm
> /*
> * Exception vectors.
> */
> @@ -901,6 +906,7 @@ work_pending:
> */
> ret_to_user:
> disable_daif
> + erase_kstack
I *think* this should happen in finish_ret_to_user a few lines down, since we
can call C code if we branch to work_pending, dirtying the stack.
> ldr x1, [tsk, #TSK_TI_FLAGS]
> and x2, x1, #_TIF_WORK_MASK
> cbnz x2, work_pending
> @@ -1337,3 +1343,105 @@ alternative_else_nop_endif
> ENDPROC(__sdei_asm_handler)
> NOKPROBE(__sdei_asm_handler)
> #endif /* CONFIG_ARM_SDE_INTERFACE */
> +
> +/*
> + * This is what the stack looks like
> + *
> + * +---+ <- task_stack_page(p) + THREAD_SIZE
> + * | |
> + * +---+ <- task_stack_page(p) + THREAD_START_SP
> + * | |
> + * | |
> + * +---+ <- task_pt_regs(p)
THREAD_START_SP got killed off in commit 34be98f4944f9907 as part of the
VMAP_STACK rework, so this can be:
+---+ <- task_stack_page(p) + THREAD_SIZE
| |
| |
+---+ <- task_pt_regs(p)
...
> + * | |
> + * | |
> + * | | <- current_sp
> + * ~~~~~
> + *
> + * ~~~~~
> + * | | <- lowest_stack
> + * | |
> + * | |
> + * +---+ <- task_stack_page(p)
> + *
> + * This function is desgned to poison the memory between the lowest_stack
> + * and the current stack pointer. After clearing the stack, the lowest
> + * stack is reset.
> + */
> +
> +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
> +ENTRY(__erase_kstack)
> + mov x10, x0 // save x0 for the fast path
AFAICT, we only call this from ret_to_user, where x0 doesn't need to be
preserved.
Is that for ret_fast_syscall? In some cases, ret_fast_syscall can bypass
ret_to_user and calls kernel_exit directly, so we might need a call there.
> +
> + get_thread_info x0
> + ldr x1, [x0, #TSK_TI_LOWEST_STACK]
> +
> + /* get the number of bytes to check for lowest stack */
> + mov x3, x1
> + and x3, x3, #THREAD_SIZE - 1
> + lsr x3, x3, #3
> +
> + /* generate addresses from the bottom of the stack */
> + mov x4, sp
> + movn x2, #THREAD_SIZE - 1
> + and x1, x4, x2
Can we replace the MOVN;AND with a single instruction to clear the low bits?
e.g.
mov x4, sp
bic x1, x4, #THREAD_SIZE - 1
... IIUC BIC is an alias for the bitfield instructions, though I can't recall
exactly which one(s).
> +
> + mov x2, #STACKLEAK_POISON
> +
> + mov x5, #0
> +1:
> + /*
> + * As borrowed from the x86 logic, start from the lowest_stack
> + * and go to the bottom to find the poison value.
> + * The check of 16 is to hopefully avoid false positives.
> + */
> + cbz x3, 4f
> + ldr x4, [x1, x3, lsl #3]
> + cmp x4, x2
> + csinc x5, xzr, x5, ne
> + tbnz x5, #STACKLEAK_POISON_CHECK_DEPTH/4, 4f // found 16 poisons?
> + sub x3, x3, #1
> + b 1b
> +
> +4:
> + /* total number of bytes to poison */
> + add x5, x1, x3, lsl #3
> + mov x4, sp
> + sub x8, x4, x5
> +
> + cmp x8, #THREAD_SIZE // sanity check the range
> + b.lo 5f
> + ASM_BUG()
> +
> +5:
> + /*
> + * We may have hit a path where the stack did not get used,
> + * no need to do anything here
> + */
> + cbz x8, 7f
> +
> + sub x8, x8, #1 // don't poison the current stack pointer
> +
> + lsr x8, x8, #3
> + add x3, x3, x8
> +
> + /*
> + * The logic of this loop ensures the last stack word isn't
> + * ovewritten.
> + */
Is that to ensure that we don't clobber the word at the current sp value?
> +6:
> + cbz x8, 7f
> + str x2, [x1, x3, lsl #3]
> + sub x3, x3, #1
> + sub x8, x8, #1
> + b 6b
> +
> + /* Reset the lowest stack to the top of the stack */
> +7:
> + mov x1, sp
> + str x1, [x0, #TSK_TI_LOWEST_STACK]
> +
> + mov x0, x10
> + ret
> +ENDPROC(__erase_kstack)
> +#endif
[...]
> diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
> index 7b3ba40f0745..35ebbc1b17ff 100644
> --- a/drivers/firmware/efi/libstub/Makefile
> +++ b/drivers/firmware/efi/libstub/Makefile
> @@ -20,7 +20,8 @@ cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt
> KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
> -D__NO_FORTIFY \
> $(call cc-option,-ffreestanding) \
> - $(call cc-option,-fno-stack-protector)
> + $(call cc-option,-fno-stack-protector) \
> + $(DISABLE_STACKLEAK_PLUGIN)
I believe the KVM hyp code will also need to opt-out of this.
Thanks,
Mark.
On 02/21/2018 07:38 AM, Mark Rutland wrote:
> Hi Laura,
>
> On Tue, Feb 20, 2018 at 05:13:03PM -0800, Laura Abbott wrote:
>> Implementation of stackleak based heavily on the x86 version
>
> Neat!
>
>> diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S
>> index ec2ee720e33e..b909b436293a 100644
>> --- a/arch/arm64/kernel/entry.S
>> +++ b/arch/arm64/kernel/entry.S
>> @@ -401,6 +401,11 @@ tsk .req x28 // current thread_info
>>
>> .text
>>
>> + .macro erase_kstack
>> +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
>> + bl __erase_kstack
>> +#endif
>> + .endm
>> /*
>> * Exception vectors.
>> */
>> @@ -901,6 +906,7 @@ work_pending:
>> */
>> ret_to_user:
>> disable_daif
>> + erase_kstack
>
> I *think* this should happen in finish_ret_to_user a few lines down, since we
> can call C code if we branch to work_pending, dirtying the stack.
>
I think you're right but this didn't immediately work when I tried it.
I'll have to dig into this some more.
>> ldr x1, [tsk, #TSK_TI_FLAGS]
>> and x2, x1, #_TIF_WORK_MASK
>> cbnz x2, work_pending
>> @@ -1337,3 +1343,105 @@ alternative_else_nop_endif
>> ENDPROC(__sdei_asm_handler)
>> NOKPROBE(__sdei_asm_handler)
>> #endif /* CONFIG_ARM_SDE_INTERFACE */
>> +
>> +/*
>> + * This is what the stack looks like
>> + *
>> + * +---+ <- task_stack_page(p) + THREAD_SIZE
>> + * | |
>> + * +---+ <- task_stack_page(p) + THREAD_START_SP
>> + * | |
>> + * | |
>> + * +---+ <- task_pt_regs(p)
>
> THREAD_START_SP got killed off in commit 34be98f4944f9907 as part of the
> VMAP_STACK rework, so this can be:
>
> +---+ <- task_stack_page(p) + THREAD_SIZE
> | |
> | |
> +---+ <- task_pt_regs(p)
> ...
>
Good point.
>> + * | |
>> + * | |
>> + * | | <- current_sp
>> + * ~~~~~
>> + *
>> + * ~~~~~
>> + * | | <- lowest_stack
>> + * | |
>> + * | |
>> + * +---+ <- task_stack_page(p)
>> + *
>> + * This function is desgned to poison the memory between the lowest_stack
>> + * and the current stack pointer. After clearing the stack, the lowest
>> + * stack is reset.
>> + */
>> +
>> +#ifdef CONFIG_GCC_PLUGIN_STACKLEAK
>> +ENTRY(__erase_kstack)
>> + mov x10, x0 // save x0 for the fast path
>
> AFAICT, we only call this from ret_to_user, where x0 doesn't need to be
> preserved.
>
> Is that for ret_fast_syscall? In some cases, ret_fast_syscall can bypass
> ret_to_user and calls kernel_exit directly, so we might need a call there.
>
This was a hold over when I was experimenting with calling erase_kstack
more places, one of which came through ret_fast_syscall. I realized
later that the erase was unnecessary but accidentally kept the saving
in. I'll see about removing it assuming we don't decide later to put
a call on the fast path.
>> +
>> + get_thread_info x0
>> + ldr x1, [x0, #TSK_TI_LOWEST_STACK]
>> +
>> + /* get the number of bytes to check for lowest stack */
>> + mov x3, x1
>> + and x3, x3, #THREAD_SIZE - 1
>> + lsr x3, x3, #3
>> +
>> + /* generate addresses from the bottom of the stack */
>> + mov x4, sp
>> + movn x2, #THREAD_SIZE - 1
>> + and x1, x4, x2
>
> Can we replace the MOVN;AND with a single instruction to clear the low bits?
> e.g.
>
> mov x4, sp
> bic x1, x4, #THREAD_SIZE - 1
>
> ... IIUC BIC is an alias for the bitfield instructions, though I can't recall
> exactly which one(s).
>
Good suggestion.
>> +
>> + mov x2, #STACKLEAK_POISON
>> +
>> + mov x5, #0
>> +1:
>> + /*
>> + * As borrowed from the x86 logic, start from the lowest_stack
>> + * and go to the bottom to find the poison value.
>> + * The check of 16 is to hopefully avoid false positives.
>> + */
>> + cbz x3, 4f
>> + ldr x4, [x1, x3, lsl #3]
>> + cmp x4, x2
>> + csinc x5, xzr, x5, ne
>> + tbnz x5, #STACKLEAK_POISON_CHECK_DEPTH/4, 4f // found 16 poisons?
>> + sub x3, x3, #1
>> + b 1b
>> +
>> +4:
>> + /* total number of bytes to poison */
>> + add x5, x1, x3, lsl #3
>> + mov x4, sp
>> + sub x8, x4, x5
>> +
>> + cmp x8, #THREAD_SIZE // sanity check the range
>> + b.lo 5f
>> + ASM_BUG()
>> +
>> +5:
>> + /*
>> + * We may have hit a path where the stack did not get used,
>> + * no need to do anything here
>> + */
>> + cbz x8, 7f
>> +
>> + sub x8, x8, #1 // don't poison the current stack pointer
>> +
>> + lsr x8, x8, #3
>> + add x3, x3, x8
>> +
>> + /*
>> + * The logic of this loop ensures the last stack word isn't
>> + * ovewritten.
>> + */
>
> Is that to ensure that we don't clobber the word at the current sp value?
>
Correct.
>> +6:
>> + cbz x8, 7f
>> + str x2, [x1, x3, lsl #3]
>> + sub x3, x3, #1
>> + sub x8, x8, #1
>> + b 6b
>> +
>> + /* Reset the lowest stack to the top of the stack */
>> +7:
>> + mov x1, sp
>> + str x1, [x0, #TSK_TI_LOWEST_STACK]
>> +
>> + mov x0, x10
>> + ret
>> +ENDPROC(__erase_kstack)
>> +#endif
>
> [...]
>
>> diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile
>> index 7b3ba40f0745..35ebbc1b17ff 100644
>> --- a/drivers/firmware/efi/libstub/Makefile
>> +++ b/drivers/firmware/efi/libstub/Makefile
>> @@ -20,7 +20,8 @@ cflags-$(CONFIG_EFI_ARMSTUB) += -I$(srctree)/scripts/dtc/libfdt
>> KBUILD_CFLAGS := $(cflags-y) -DDISABLE_BRANCH_PROFILING \
>> -D__NO_FORTIFY \
>> $(call cc-option,-ffreestanding) \
>> - $(call cc-option,-fno-stack-protector)
>> + $(call cc-option,-fno-stack-protector) \
>> + $(DISABLE_STACKLEAK_PLUGIN)
>
> I believe the KVM hyp code will also need to opt-out of this.
>
I'll double check that.
> Thanks,
> Mark.
>
Thanks,
Laura
On 02/21/2018 03:53 PM, Laura Abbott wrote:
>> I *think* this should happen in finish_ret_to_user a few lines down, since we
>> can call C code if we branch to work_pending, dirtying the stack.
>>
>
> I think you're right but this didn't immediately work when I tried it.
> I'll have to dig into this some more.
Okay I figured this out. Not corrupting registers works wonders.
Hi Laura,
On Tue, Feb 20, 2018 at 05:13:02PM -0800, Laura Abbott wrote:
>
> arm64 has another layer of indirection in the RTL.
> Account for this in the plugin.
>
> Signed-off-by: Laura Abbott <[email protected]>
> ---
> scripts/gcc-plugins/stackleak_plugin.c | 5 +++++
> 1 file changed, 5 insertions(+)
>
> diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c
> index 6fc991c98d8b..7dfaa027423f 100644
> --- a/scripts/gcc-plugins/stackleak_plugin.c
> +++ b/scripts/gcc-plugins/stackleak_plugin.c
> @@ -244,6 +244,11 @@ static unsigned int stackleak_final_execute(void)
> * that insn.
> */
> body = PATTERN(insn);
> + /* arm64 is different */
> + if (GET_CODE(body) == PARALLEL) {
> + body = XEXP(body, 0);
> + body = XEXP(body, 0);
> + }
Like most kernel developers, I don't know the first thing about GCC internals
so I asked our GCC team and Richard (CC'd) reckons this should be:
if (GET_CODE(body) == PARALLEL)
body = XVECEXP(body, 0, 0);
instead of the hunk above. Can you give that a go instead, please?
Cheers,
Will
Hello Will, Richard and GCC folks!
On 22.02.2018 19:58, Will Deacon wrote:
> On Tue, Feb 20, 2018 at 05:13:02PM -0800, Laura Abbott wrote:
>>
>> arm64 has another layer of indirection in the RTL.
>> Account for this in the plugin.
>>
>> Signed-off-by: Laura Abbott <[email protected]>
>> ---
>> scripts/gcc-plugins/stackleak_plugin.c | 5 +++++
>> 1 file changed, 5 insertions(+)
>>
>> diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c
>> index 6fc991c98d8b..7dfaa027423f 100644
>> --- a/scripts/gcc-plugins/stackleak_plugin.c
>> +++ b/scripts/gcc-plugins/stackleak_plugin.c
>> @@ -244,6 +244,11 @@ static unsigned int stackleak_final_execute(void)
>> * that insn.
>> */
>> body = PATTERN(insn);
>> + /* arm64 is different */
>> + if (GET_CODE(body) == PARALLEL) {
>> + body = XEXP(body, 0);
>> + body = XEXP(body, 0);
>> + }
>
> Like most kernel developers, I don't know the first thing about GCC internals
> so I asked our GCC team and Richard (CC'd) reckons this should be:
>
> if (GET_CODE(body) == PARALLEL)
> body = XVECEXP(body, 0, 0);
>
> instead of the hunk above. Can you give that a go instead, please?
Thanks a lot!
Would you be so kind to take a look at the whole STACKLEAK plugin?
http://www.openwall.com/lists/kernel-hardening/2018/02/16/4
https://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git/commit/?h=kspp/gcc-plugin/stackleak&id=57a0a6763b12e82dd462593d0f42be610e93cdc9
It's not very big. I documented it in detail. I would be really grateful for the
review!
Best regards,
Alexander
On 02/22/2018 08:58 AM, Will Deacon wrote:
> Hi Laura,
>
> On Tue, Feb 20, 2018 at 05:13:02PM -0800, Laura Abbott wrote:
>>
>> arm64 has another layer of indirection in the RTL.
>> Account for this in the plugin.
>>
>> Signed-off-by: Laura Abbott <[email protected]>
>> ---
>> scripts/gcc-plugins/stackleak_plugin.c | 5 +++++
>> 1 file changed, 5 insertions(+)
>>
>> diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c
>> index 6fc991c98d8b..7dfaa027423f 100644
>> --- a/scripts/gcc-plugins/stackleak_plugin.c
>> +++ b/scripts/gcc-plugins/stackleak_plugin.c
>> @@ -244,6 +244,11 @@ static unsigned int stackleak_final_execute(void)
>> * that insn.
>> */
>> body = PATTERN(insn);
>> + /* arm64 is different */
>> + if (GET_CODE(body) == PARALLEL) {
>> + body = XEXP(body, 0);
>> + body = XEXP(body, 0);
>> + }
>
> Like most kernel developers, I don't know the first thing about GCC internals
> so I asked our GCC team and Richard (CC'd) reckons this should be:
>
> if (GET_CODE(body) == PARALLEL)
> body = XVECEXP(body, 0, 0);
>
> instead of the hunk above. Can you give that a go instead, please?
>
> Cheers,
>
> Will
>
Yep, seems to work fine and makes sense from my understanding of
gcc internals. I'll fix it up for the next version. Thanks for the
review!
Laura
Hi Alexander,
Sorry for the slow reply, been caught up in an office move.
Alexander Popov <[email protected]> writes:
> Hello Will, Richard and GCC folks!
>
> On 22.02.2018 19:58, Will Deacon wrote:
>> On Tue, Feb 20, 2018 at 05:13:02PM -0800, Laura Abbott wrote:
>>>
>>> arm64 has another layer of indirection in the RTL.
>>> Account for this in the plugin.
>>>
>>> Signed-off-by: Laura Abbott <[email protected]>
>>> ---
>>> scripts/gcc-plugins/stackleak_plugin.c | 5 +++++
>>> 1 file changed, 5 insertions(+)
>>>
>>> diff --git a/scripts/gcc-plugins/stackleak_plugin.c b/scripts/gcc-plugins/stackleak_plugin.c
>>> index 6fc991c98d8b..7dfaa027423f 100644
>>> --- a/scripts/gcc-plugins/stackleak_plugin.c
>>> +++ b/scripts/gcc-plugins/stackleak_plugin.c
>>> @@ -244,6 +244,11 @@ static unsigned int stackleak_final_execute(void)
>>> * that insn.
>>> */
>>> body = PATTERN(insn);
>>> + /* arm64 is different */
>>> + if (GET_CODE(body) == PARALLEL) {
>>> + body = XEXP(body, 0);
>>> + body = XEXP(body, 0);
>>> + }
>>
>> Like most kernel developers, I don't know the first thing about GCC internals
>> so I asked our GCC team and Richard (CC'd) reckons this should be:
>>
>> if (GET_CODE(body) == PARALLEL)
>> body = XVECEXP(body, 0, 0);
>>
>> instead of the hunk above. Can you give that a go instead, please?
>
> Thanks a lot!
>
> Would you be so kind to take a look at the whole STACKLEAK plugin?
> http://www.openwall.com/lists/kernel-hardening/2018/02/16/4
> https://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git/commit/?h=kspp/gcc-plugin/stackleak&id=57a0a6763b12e82dd462593d0f42be610e93cdc9
>
> It's not very big. I documented it in detail. I would be really grateful for the
> review!
Looks good to me FWIW. Just a couple of minor things:
> + /*
> + * 1. Loop through the GIMPLE statements in each of cfun basic blocks.
> + * cfun is a global variable which represents the function that is
> + * currently processed.
> + */
> + FOR_EACH_BB_FN(bb, cfun) {
> + for (gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) {
> + gimple stmt;
> +
> + stmt = gsi_stmt(gsi);
> +
> + /* Leaf function is a function which makes no calls */
> + if (is_gimple_call(stmt))
> + is_leaf = false;
It's probably not going to matter in practice, but it might be worth
emphasising in the comments that this leafness is only approximate.
It will sometimes be a false positive, because we could still
end up creating calls to libgcc functions from non-call statements
(or for target-specific reasons). It can also be a false negative,
since call statements can be to built-in or internal functions that
end up being open-coded.
> + /*
> + * The stackleak_final pass should be executed before the "final" pass,
> + * which turns the RTL (Register Transfer Language) into assembly.
> + */
> + PASS_INFO(stackleak_final, "final", 1, PASS_POS_INSERT_BEFORE);
This might be too late, since it happens e.g. after addresses have
been calculated for branch ranges, and after machine-specific passes
(e.g. bundling on ia64).
The stack size is final after reload, so inserting the pass after that
might be better.
Thanks,
Richard
On 27.02.2018 13:21, Richard Sandiford wrote:
> Hi Alexander,
>
> Sorry for the slow reply, been caught up in an office move.
Thank you very much for the review, Richard!
> Alexander Popov <[email protected]> writes:
>> Would you be so kind to take a look at the whole STACKLEAK plugin?
>> http://www.openwall.com/lists/kernel-hardening/2018/02/16/4
>> https://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git/commit/?h=kspp/gcc-plugin/stackleak&id=57a0a6763b12e82dd462593d0f42be610e93cdc9
>>
>> It's not very big. I documented it in detail. I would be really grateful for the
>> review!
>
> Looks good to me FWIW. Just a couple of minor things:
>
>> + /*
>> + * 1. Loop through the GIMPLE statements in each of cfun basic blocks.
>> + * cfun is a global variable which represents the function that is
>> + * currently processed.
>> + */
>> + FOR_EACH_BB_FN(bb, cfun) {
>> + for (gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) {
>> + gimple stmt;
>> +
>> + stmt = gsi_stmt(gsi);
>> +
>> + /* Leaf function is a function which makes no calls */
>> + if (is_gimple_call(stmt))
>> + is_leaf = false;
>
> It's probably not going to matter in practice, but it might be worth
> emphasising in the comments that this leafness is only approximate.
That's important, thank you! May I ask why you think it's not going to matter in
practice?
> It will sometimes be a false positive, because we could still
> end up creating calls to libgcc functions from non-call statements
> (or for target-specific reasons). It can also be a false negative,
> since call statements can be to built-in or internal functions that
> end up being open-coded.
Oh, that raises the question: how does this leafness inaccuracy affect in my
particular case?
is_leaf is currently used for finding the special cases to skip the
track_stack() call insertion:
/*
* Special cases to skip the instrumentation.
*
* Taking the address of static inline functions materializes them,
* but we mustn't instrument some of them as the resulting stack
* alignment required by the function call ABI will break other
* assumptions regarding the expected (but not otherwise enforced)
* register clobbering ABI.
*
* Case in point: native_save_fl on amd64 when optimized for size
* clobbers rdx if it were instrumented here.
*
* TODO: any more special cases?
*/
if (is_leaf &&
!TREE_PUBLIC(current_function_decl) &&
DECL_DECLARED_INLINE_P(current_function_decl)) {
return 0;
}
And now it seems to me that the stackleak plugin should not instrument all
static inline functions, regardless of is_leaf. Do you agree?
>> + /*
>> + * The stackleak_final pass should be executed before the "final" pass,
>> + * which turns the RTL (Register Transfer Language) into assembly.
>> + */
>> + PASS_INFO(stackleak_final, "final", 1, PASS_POS_INSERT_BEFORE);
>
> This might be too late, since it happens e.g. after addresses have
> been calculated for branch ranges, and after machine-specific passes
> (e.g. bundling on ia64).
>
> The stack size is final after reload, so inserting the pass after that
> might be better.
Thanks for that notice. May I ask for the additional clarification?
I specified -fdump-passes and see a lot of passes between reload and final:
...
rtl-sched1 : OFF
rtl-ira : ON
rtl-reload : ON
rtl-vzeroupper : OFF
*all-postreload : OFF
rtl-postreload : OFF
rtl-gcse2 : OFF
rtl-split2 : ON
rtl-ree : ON
rtl-cmpelim : OFF
rtl-btl1 : OFF
rtl-pro_and_epilogue : ON
rtl-dse2 : ON
rtl-csa : ON
rtl-jump2 : ON
rtl-compgotos : ON
rtl-sched_fusion : OFF
rtl-peephole2 : ON
rtl-ce3 : ON
rtl-rnreg : OFF
rtl-cprop_hardreg : ON
rtl-rtl_dce : ON
rtl-bbro : ON
rtl-btl2 : OFF
*leaf_regs : ON
rtl-split4 : ON
rtl-sched2 : ON
*stack_regs : ON
rtl-split3 : OFF
rtl-stack : ON
*all-late_compilation : OFF
rtl-alignments : ON
rtl-vartrack : ON
*free_cfg : ON
rtl-mach : ON
rtl-barriers : ON
rtl-dbr : OFF
rtl-split5 : OFF
rtl-eh_ranges : OFF
rtl-shorten : ON
rtl-nothrow : ON
rtl-dwarf2 : ON
rtl-stackleak_final : ON
rtl-final : ON
rtl-dfinish : ON
clean_state : ON
Where exactly would you recommend me to insert the stackleak_final pass, which
removes the unneeded track_stack() calls?
Best regards,
Alexander
Alexander Popov <[email protected]> writes:
> On 27.02.2018 13:21, Richard Sandiford wrote:
>> Hi Alexander,
>>
>> Sorry for the slow reply, been caught up in an office move.
>
> Thank you very much for the review, Richard!
>
>> Alexander Popov <[email protected]> writes:
>>> Would you be so kind to take a look at the whole STACKLEAK plugin?
>>> http://www.openwall.com/lists/kernel-hardening/2018/02/16/4
>>> https://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git/commit/?h=kspp/gcc-plugin/stackleak&id=57a0a6763b12e82dd462593d0f42be610e93cdc9
>>>
>>> It's not very big. I documented it in detail. I would be really
>>> grateful for the
>>> review!
>>
>> Looks good to me FWIW. Just a couple of minor things:
>>
>>> + /*
>>> + * 1. Loop through the GIMPLE statements in each of cfun basic blocks.
>>> + * cfun is a global variable which represents the function that is
>>> + * currently processed.
>>> + */
>>> + FOR_EACH_BB_FN(bb, cfun) {
>>> + for (gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) {
>>> + gimple stmt;
>>> +
>>> + stmt = gsi_stmt(gsi);
>>> +
>>> + /* Leaf function is a function which makes no calls */
>>> + if (is_gimple_call(stmt))
>>> + is_leaf = false;
>>
>> It's probably not going to matter in practice, but it might be worth
>> emphasising in the comments that this leafness is only approximate.
>
> That's important, thank you! May I ask why you think it's not going to matter in
> practice?
I just thought the kind of calls it misses are going to have very
shallow frames, but from what you said later I guess that isn't the
point. It also might be a bit too hand-wavy for something like this :-)
>> It will sometimes be a false positive, because we could still
>> end up creating calls to libgcc functions from non-call statements
>> (or for target-specific reasons). It can also be a false negative,
>> since call statements can be to built-in or internal functions that
>> end up being open-coded.
>
> Oh, that raises the question: how does this leafness inaccuracy affect in my
> particular case?
>
> is_leaf is currently used for finding the special cases to skip the
> track_stack() call insertion:
>
> /*
> * Special cases to skip the instrumentation.
> *
> * Taking the address of static inline functions materializes them,
> * but we mustn't instrument some of them as the resulting stack
> * alignment required by the function call ABI will break other
> * assumptions regarding the expected (but not otherwise enforced)
> * register clobbering ABI.
> *
> * Case in point: native_save_fl on amd64 when optimized for size
> * clobbers rdx if it were instrumented here.
> *
> * TODO: any more special cases?
> */
> if (is_leaf &&
> !TREE_PUBLIC(current_function_decl) &&
> DECL_DECLARED_INLINE_P(current_function_decl)) {
> return 0;
> }
>
>
> And now it seems to me that the stackleak plugin should not instrument all
> static inline functions, regardless of is_leaf. Do you agree?
OK. I'd missed that this was just a heuristic to detect certain kinds
of linux function, so it's probably fine as it is.
Not sure whether it's safe to punt for general static inline functions.
E.g. couldn't you have a static inline function that just provides a
more convenient interface to another function? But I guess it's a
linux-specific heuristic, so I can't really say.
TBH the paravirt save_fl stuff seems like dancing on the edge,
but that's another story. :-)
>>> + /*
>>> + * The stackleak_final pass should be executed before the "final" pass,
>>> + * which turns the RTL (Register Transfer Language) into assembly.
>>> + */
>>> + PASS_INFO(stackleak_final, "final", 1, PASS_POS_INSERT_BEFORE);
>>
>> This might be too late, since it happens e.g. after addresses have
>> been calculated for branch ranges, and after machine-specific passes
>> (e.g. bundling on ia64).
>>
>> The stack size is final after reload, so inserting the pass after that
>> might be better.
>
> Thanks for that notice. May I ask for the additional clarification?
>
> I specified -fdump-passes and see a lot of passes between reload and final:
> ...
> rtl-sched1 : OFF
> rtl-ira : ON
> rtl-reload : ON
> rtl-vzeroupper : OFF
> *all-postreload : OFF
> rtl-postreload : OFF
> rtl-gcse2 : OFF
> rtl-split2 : ON
> rtl-ree : ON
> rtl-cmpelim : OFF
> rtl-btl1 : OFF
> rtl-pro_and_epilogue : ON
> rtl-dse2 : ON
> rtl-csa : ON
> rtl-jump2 : ON
> rtl-compgotos : ON
> rtl-sched_fusion : OFF
> rtl-peephole2 : ON
> rtl-ce3 : ON
> rtl-rnreg : OFF
> rtl-cprop_hardreg : ON
> rtl-rtl_dce : ON
> rtl-bbro : ON
> rtl-btl2 : OFF
> *leaf_regs : ON
> rtl-split4 : ON
> rtl-sched2 : ON
> *stack_regs : ON
> rtl-split3 : OFF
> rtl-stack : ON
> *all-late_compilation : OFF
> rtl-alignments : ON
> rtl-vartrack : ON
> *free_cfg : ON
> rtl-mach : ON
> rtl-barriers : ON
> rtl-dbr : OFF
> rtl-split5 : OFF
> rtl-eh_ranges : OFF
> rtl-shorten : ON
> rtl-nothrow : ON
> rtl-dwarf2 : ON
> rtl-stackleak_final : ON
> rtl-final : ON
> rtl-dfinish : ON
> clean_state : ON
>
> Where exactly would you recommend me to insert the stackleak_final pass, which
> removes the unneeded track_stack() calls?
Directly after rtl-reload seems best. That's the first point at which
the frame size is final, and reload is one of the few rtl passes that
always runs. Doing it there could also help with things like shrink
wrapping (part of rtl-pro_and_epilogue).
Thanks,
Richard
Thanks for your reply, Richard!
On 01.03.2018 13:33, Richard Sandiford wrote:
> Alexander Popov <[email protected]> writes:
>> On 27.02.2018 13:21, Richard Sandiford wrote:
>>> Alexander Popov <[email protected]> writes:
>>>> Would you be so kind to take a look at the whole STACKLEAK plugin?
>>>> http://www.openwall.com/lists/kernel-hardening/2018/02/16/4
>>>> https://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git/commit/?h=kspp/gcc-plugin/stackleak&id=57a0a6763b12e82dd462593d0f42be610e93cdc9
>>>>
>>>> It's not very big. I documented it in detail. I would be really
>>>> grateful for the
>>>> review!
>>>
>>> Looks good to me FWIW. Just a couple of minor things:
>>>
>>>> + /*
>>>> + * 1. Loop through the GIMPLE statements in each of cfun basic blocks.
>>>> + * cfun is a global variable which represents the function that is
>>>> + * currently processed.
>>>> + */
>>>> + FOR_EACH_BB_FN(bb, cfun) {
>>>> + for (gsi = gsi_start_bb(bb); !gsi_end_p(gsi); gsi_next(&gsi)) {
>>>> + gimple stmt;
>>>> +
>>>> + stmt = gsi_stmt(gsi);
>>>> +
>>>> + /* Leaf function is a function which makes no calls */
>>>> + if (is_gimple_call(stmt))
>>>> + is_leaf = false;
>>>
>>> It's probably not going to matter in practice, but it might be worth
>>> emphasising in the comments that this leafness is only approximate.
>>
>> That's important, thank you! May I ask why you think it's not going to matter in
>> practice?
>
> I just thought the kind of calls it misses are going to have very
> shallow frames, but from what you said later I guess that isn't the
> point. It also might be a bit too hand-wavy for something like this :-)
>
>>> It will sometimes be a false positive, because we could still
>>> end up creating calls to libgcc functions from non-call statements
>>> (or for target-specific reasons). It can also be a false negative,
>>> since call statements can be to built-in or internal functions that
>>> end up being open-coded.
>>
>> Oh, that raises the question: how does this leafness inaccuracy affect in my
>> particular case?
>>
>> is_leaf is currently used for finding the special cases to skip the
>> track_stack() call insertion:
>>
>> /*
>> * Special cases to skip the instrumentation.
>> *
>> * Taking the address of static inline functions materializes them,
>> * but we mustn't instrument some of them as the resulting stack
>> * alignment required by the function call ABI will break other
>> * assumptions regarding the expected (but not otherwise enforced)
>> * register clobbering ABI.
>> *
>> * Case in point: native_save_fl on amd64 when optimized for size
>> * clobbers rdx if it were instrumented here.
>> *
>> * TODO: any more special cases?
>> */
>> if (is_leaf &&
>> !TREE_PUBLIC(current_function_decl) &&
>> DECL_DECLARED_INLINE_P(current_function_decl)) {
>> return 0;
>> }
>>
>>
>> And now it seems to me that the stackleak plugin should not instrument all
>> static inline functions, regardless of is_leaf. Do you agree?
>
> OK. I'd missed that this was just a heuristic to detect certain kinds
> of linux function, so it's probably fine as it is.
>
> Not sure whether it's safe to punt for general static inline functions.
> E.g. couldn't you have a static inline function that just provides a
> more convenient interface to another function? But I guess it's a
> linux-specific heuristic, so I can't really say.
Huh, I got the insight! I think that the current approach (originally by PaX
Team) should work fine despite the false positives which you described:
If some static inline function already does explicit calls (so is_leaf is
false), adding the track_stack() call will not introduce anything special that
can break the aforementioned register clobbering ABI in that function.
Does it sound reasonable?
However, I don't know what to with false negatives.
> TBH the paravirt save_fl stuff seems like dancing on the edge,
> but that's another story. :-)
That's interesting. Could you elaborate on that?
>>>> + /*
>>>> + * The stackleak_final pass should be executed before the "final" pass,
>>>> + * which turns the RTL (Register Transfer Language) into assembly.
>>>> + */
>>>> + PASS_INFO(stackleak_final, "final", 1, PASS_POS_INSERT_BEFORE);
>>>
>>> This might be too late, since it happens e.g. after addresses have
>>> been calculated for branch ranges, and after machine-specific passes
>>> (e.g. bundling on ia64).
>>>
>>> The stack size is final after reload, so inserting the pass after that
>>> might be better.
>>
>> Thanks for that notice. May I ask for the additional clarification?
>>
>> I specified -fdump-passes and see a lot of passes between reload and final:
...
>>
>> Where exactly would you recommend me to insert the stackleak_final pass, which
>> removes the unneeded track_stack() calls?
>
> Directly after rtl-reload seems best. That's the first point at which
> the frame size is final, and reload is one of the few rtl passes that
> always runs. Doing it there could also help with things like shrink
> wrapping (part of rtl-pro_and_epilogue).
Thanks a lot for your detailed answer. I'll follow your advice in the next
version of the patch series.
Best regards,
Alexander