2022-04-27 17:58:03

by Mark Rutland

[permalink] [raw]
Subject: [PATCH v2 02/13] stackleak: move skip_erasing() check earlier

In stackleak_erase() we check skip_erasing() after accessing some fields
from current. As generating the address of current uses asm which
hazards with the static branch asm, this work is always performed, even
when the static branch is patched to jump to the return a the end of the
function.

This patch avoids this redundant work by moving the skip_erasing() check
earlier.

To avoid complicating initialization within stackleak_erase(), the body
of the function is split out into a __stackleak_erase() helper, with the
check left in a wrapper function. The __stackleak_erase() helper is
marked __always_inline to ensure that this is inlined into
stackleak_erase() and not instrumented.

Before this patch, on x86-64 w/ GCC 11.1.0 the start of the function is:

<stackleak_erase>:
65 48 8b 04 25 00 00 mov %gs:0x0,%rax
00 00
48 8b 48 20 mov 0x20(%rax),%rcx
48 8b 80 98 0a 00 00 mov 0xa98(%rax),%rax
66 90 xchg %ax,%ax <------------ static branch
48 89 c2 mov %rax,%rdx
48 29 ca sub %rcx,%rdx
48 81 fa ff 3f 00 00 cmp $0x3fff,%rdx

After this patch, on x86-64 w/ GCC 11.1.0 the start of the function is:

<stackleak_erase>:
0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) <--- static branch
65 48 8b 04 25 00 00 mov %gs:0x0,%rax
00 00
48 8b 48 20 mov 0x20(%rax),%rcx
48 8b 80 98 0a 00 00 mov 0xa98(%rax),%rax
48 89 c2 mov %rax,%rdx
48 29 ca sub %rcx,%rdx
48 81 fa ff 3f 00 00 cmp $0x3fff,%rdx

Before this patch, on arm64 w/ GCC 11.1.0 the start of the function is:

<stackleak_erase>:
d503245f bti c
d5384100 mrs x0, sp_el0
f9401003 ldr x3, [x0, #32]
f9451000 ldr x0, [x0, #2592]
d503201f nop <------------------------------- static branch
d503233f paciasp
cb030002 sub x2, x0, x3
d287ffe1 mov x1, #0x3fff
eb01005f cmp x2, x1

After this patch, on arm64 w/ GCC 11.1.0 the start of the function is:

<stackleak_erase>:
d503245f bti c
d503201f nop <------------------------------- static branch
d503233f paciasp
d5384100 mrs x0, sp_el0
f9401003 ldr x3, [x0, #32]
d287ffe1 mov x1, #0x3fff
f9451000 ldr x0, [x0, #2592]
cb030002 sub x2, x0, x3
eb01005f cmp x2, x1

While this may not be a huge win on its own, moving the static branch
will permit further optimization of the body of the function in
subsequent patches.

Signed-off-by: Mark Rutland <[email protected]>
Cc: Alexander Popov <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Kees Cook <[email protected]>
---
kernel/stackleak.c | 13 +++++++++----
1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/kernel/stackleak.c b/kernel/stackleak.c
index ddb5a7f48d69e..753eab797a04d 100644
--- a/kernel/stackleak.c
+++ b/kernel/stackleak.c
@@ -70,7 +70,7 @@ late_initcall(stackleak_sysctls_init);
#define skip_erasing() false
#endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */

-asmlinkage void noinstr stackleak_erase(void)
+static __always_inline void __stackleak_erase(void)
{
/* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
unsigned long kstack_ptr = current->lowest_stack;
@@ -78,9 +78,6 @@ asmlinkage void noinstr stackleak_erase(void)
unsigned int poison_count = 0;
const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);

- if (skip_erasing())
- return;
-
/* Check that 'lowest_stack' value is sane */
if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
kstack_ptr = boundary;
@@ -125,6 +122,14 @@ asmlinkage void noinstr stackleak_erase(void)
current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
}

+asmlinkage void noinstr stackleak_erase(void)
+{
+ if (skip_erasing())
+ return;
+
+ __stackleak_erase();
+}
+
void __used __no_caller_saved_registers noinstr stackleak_track_stack(void)
{
unsigned long sp = current_stack_pointer;
--
2.30.2


2022-05-09 07:45:16

by Alexander Popov

[permalink] [raw]
Subject: Re: [PATCH v2 02/13] stackleak: move skip_erasing() check earlier

On 27.04.2022 20:31, Mark Rutland wrote:
> In stackleak_erase() we check skip_erasing() after accessing some fields
> from current. As generating the address of current uses asm which
> hazards with the static branch asm, this work is always performed, even
> when the static branch is patched to jump to the return a the end of the
> function.

Nice find!

> This patch avoids this redundant work by moving the skip_erasing() check
> earlier.
>
> To avoid complicating initialization within stackleak_erase(), the body
> of the function is split out into a __stackleak_erase() helper, with the
> check left in a wrapper function. The __stackleak_erase() helper is
> marked __always_inline to ensure that this is inlined into
> stackleak_erase() and not instrumented.
>
> Before this patch, on x86-64 w/ GCC 11.1.0 the start of the function is:
>
> <stackleak_erase>:
> 65 48 8b 04 25 00 00 mov %gs:0x0,%rax
> 00 00
> 48 8b 48 20 mov 0x20(%rax),%rcx
> 48 8b 80 98 0a 00 00 mov 0xa98(%rax),%rax
> 66 90 xchg %ax,%ax <------------ static branch
> 48 89 c2 mov %rax,%rdx
> 48 29 ca sub %rcx,%rdx
> 48 81 fa ff 3f 00 00 cmp $0x3fff,%rdx
>
> After this patch, on x86-64 w/ GCC 11.1.0 the start of the function is:
>
> <stackleak_erase>:
> 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) <--- static branch
> 65 48 8b 04 25 00 00 mov %gs:0x0,%rax
> 00 00
> 48 8b 48 20 mov 0x20(%rax),%rcx
> 48 8b 80 98 0a 00 00 mov 0xa98(%rax),%rax
> 48 89 c2 mov %rax,%rdx
> 48 29 ca sub %rcx,%rdx
> 48 81 fa ff 3f 00 00 cmp $0x3fff,%rdx
>
> Before this patch, on arm64 w/ GCC 11.1.0 the start of the function is:
>
> <stackleak_erase>:
> d503245f bti c
> d5384100 mrs x0, sp_el0
> f9401003 ldr x3, [x0, #32]
> f9451000 ldr x0, [x0, #2592]
> d503201f nop <------------------------------- static branch
> d503233f paciasp
> cb030002 sub x2, x0, x3
> d287ffe1 mov x1, #0x3fff
> eb01005f cmp x2, x1
>
> After this patch, on arm64 w/ GCC 11.1.0 the start of the function is:
>
> <stackleak_erase>:
> d503245f bti c
> d503201f nop <------------------------------- static branch
> d503233f paciasp
> d5384100 mrs x0, sp_el0
> f9401003 ldr x3, [x0, #32]
> d287ffe1 mov x1, #0x3fff
> f9451000 ldr x0, [x0, #2592]
> cb030002 sub x2, x0, x3
> eb01005f cmp x2, x1
>
> While this may not be a huge win on its own, moving the static branch
> will permit further optimization of the body of the function in
> subsequent patches.
>
> Signed-off-by: Mark Rutland <[email protected]>
> Cc: Alexander Popov <[email protected]>
> Cc: Andrew Morton <[email protected]>
> Cc: Andy Lutomirski <[email protected]>
> Cc: Kees Cook <[email protected]>
> ---
> kernel/stackleak.c | 13 +++++++++----
> 1 file changed, 9 insertions(+), 4 deletions(-)
>
> diff --git a/kernel/stackleak.c b/kernel/stackleak.c
> index ddb5a7f48d69e..753eab797a04d 100644
> --- a/kernel/stackleak.c
> +++ b/kernel/stackleak.c
> @@ -70,7 +70,7 @@ late_initcall(stackleak_sysctls_init);
> #define skip_erasing() false
> #endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
>
> -asmlinkage void noinstr stackleak_erase(void)
> +static __always_inline void __stackleak_erase(void)

Are you sure that __stackleak_erase() doesn't need asmlinkage and noinstr as well?

> {
> /* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
> unsigned long kstack_ptr = current->lowest_stack;
> @@ -78,9 +78,6 @@ asmlinkage void noinstr stackleak_erase(void)
> unsigned int poison_count = 0;
> const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
>
> - if (skip_erasing())
> - return;
> -
> /* Check that 'lowest_stack' value is sane */
> if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
> kstack_ptr = boundary;
> @@ -125,6 +122,14 @@ asmlinkage void noinstr stackleak_erase(void)
> current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
> }
>
> +asmlinkage void noinstr stackleak_erase(void)
> +{
> + if (skip_erasing())
> + return;
> +
> + __stackleak_erase();
> +}
> +
> void __used __no_caller_saved_registers noinstr stackleak_track_stack(void)
> {
> unsigned long sp = current_stack_pointer;


2022-05-10 13:37:24

by Mark Rutland

[permalink] [raw]
Subject: Re: [PATCH v2 02/13] stackleak: move skip_erasing() check earlier

On Sun, May 08, 2022 at 08:44:56PM +0300, Alexander Popov wrote:
> On 27.04.2022 20:31, Mark Rutland wrote:
> > In stackleak_erase() we check skip_erasing() after accessing some fields
> > from current. As generating the address of current uses asm which
> > hazards with the static branch asm, this work is always performed, even
> > when the static branch is patched to jump to the return a the end of the
> > function.
>
> Nice find!
>
> > This patch avoids this redundant work by moving the skip_erasing() check
> > earlier.
> >
> > To avoid complicating initialization within stackleak_erase(), the body
> > of the function is split out into a __stackleak_erase() helper, with the
> > check left in a wrapper function. The __stackleak_erase() helper is
> > marked __always_inline to ensure that this is inlined into
> > stackleak_erase() and not instrumented.

[...]

> > diff --git a/kernel/stackleak.c b/kernel/stackleak.c
> > index ddb5a7f48d69e..753eab797a04d 100644
> > --- a/kernel/stackleak.c
> > +++ b/kernel/stackleak.c
> > @@ -70,7 +70,7 @@ late_initcall(stackleak_sysctls_init);
> > #define skip_erasing() false
> > #endif /* CONFIG_STACKLEAK_RUNTIME_DISABLE */
> > -asmlinkage void noinstr stackleak_erase(void)
> > +static __always_inline void __stackleak_erase(void)
>
> Are you sure that __stackleak_erase() doesn't need asmlinkage and noinstr as well?

I am certain it needs neither.

It's static and never called from asm, so it doesn't need `asmlinkage`.

It's marked `__always_inline`, so it will always be inlined into its caller (or
if the compiler cannot inline it, will result in a compiler error).

That's important to get good codegen (especially with the on/off stack variants
later in the series), and when inlined into its caller the compiler will treat
it as part of its caller for code generation, so the caller's `noinstr` takes
effect.

Thanks,
Mark.

>
> > {
> > /* It would be nice not to have 'kstack_ptr' and 'boundary' on stack */
> > unsigned long kstack_ptr = current->lowest_stack;
> > @@ -78,9 +78,6 @@ asmlinkage void noinstr stackleak_erase(void)
> > unsigned int poison_count = 0;
> > const unsigned int depth = STACKLEAK_SEARCH_DEPTH / sizeof(unsigned long);
> > - if (skip_erasing())
> > - return;
> > -
> > /* Check that 'lowest_stack' value is sane */
> > if (unlikely(kstack_ptr - boundary >= THREAD_SIZE))
> > kstack_ptr = boundary;
> > @@ -125,6 +122,14 @@ asmlinkage void noinstr stackleak_erase(void)
> > current->lowest_stack = current_top_of_stack() - THREAD_SIZE/64;
> > }
> > +asmlinkage void noinstr stackleak_erase(void)
> > +{
> > + if (skip_erasing())
> > + return;
> > +
> > + __stackleak_erase();
> > +}
> > +
> > void __used __no_caller_saved_registers noinstr stackleak_track_stack(void)
> > {
> > unsigned long sp = current_stack_pointer;
>
>
> _______________________________________________
> linux-arm-kernel mailing list
> [email protected]
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel