2020-05-05 14:17:42

by Thomas Gleixner

[permalink] [raw]
Subject: [patch V4 part 2 10/18] x86/entry/64: Check IF in __preempt_enable_notrace() thunk

The preempt_enable_notrace() ASM thunk is called from tracing, entry code
RCU and other places which are already in or going to be in the noinstr
section which protects sensitve code from being instrumented.

Calls out of these sections happen with interrupts disabled, which is
handled in C code, but the push regs, call, pop regs sequence can be
completely avoided in this case.

This is also a preparatory step for annotating the call from the thunk to
preempt_enable_notrace() safe from a noinstr section.

Signed-off-by: Thomas Gleixner <[email protected]>
---
arch/x86/entry/thunk_64.S | 27 +++++++++++++++++++++++----
arch/x86/include/asm/irqflags.h | 3 +--
arch/x86/include/asm/paravirt.h | 3 +--
3 files changed, 25 insertions(+), 8 deletions(-)

--- a/arch/x86/entry/thunk_64.S
+++ b/arch/x86/entry/thunk_64.S
@@ -9,10 +9,28 @@
#include "calling.h"
#include <asm/asm.h>
#include <asm/export.h>
+#include <asm/irqflags.h>
+
+.code64

/* rdi: arg1 ... normal C conventions. rax is saved/restored. */
- .macro THUNK name, func, put_ret_addr_in_rdi=0
+ .macro THUNK name, func, put_ret_addr_in_rdi=0, check_if=0
SYM_FUNC_START_NOALIGN(\name)
+
+ .if \check_if
+ /*
+ * Check for interrupts disabled right here. No point in
+ * going all the way down
+ */
+ pushq %rax
+ SAVE_FLAGS(CLBR_RAX)
+ testl $X86_EFLAGS_IF, %eax
+ popq %rax
+ jnz 1f
+ ret
+1:
+ .endif
+
pushq %rbp
movq %rsp, %rbp

@@ -38,14 +56,15 @@ SYM_FUNC_END(\name)
.endm

#ifdef CONFIG_TRACE_IRQFLAGS
- THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1
- THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1
+ THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller, put_ret_addr_in_rdi=1
+ THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller, put_ret_addr_in_rdi=1
#endif

#ifdef CONFIG_PREEMPTION
THUNK preempt_schedule_thunk, preempt_schedule
- THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
EXPORT_SYMBOL(preempt_schedule_thunk)
+
+ THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace, check_if=1
EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
#endif

--- a/arch/x86/include/asm/irqflags.h
+++ b/arch/x86/include/asm/irqflags.h
@@ -127,9 +127,8 @@ static inline notrace unsigned long arch
#define DISABLE_INTERRUPTS(x) cli

#ifdef CONFIG_X86_64
-#ifdef CONFIG_DEBUG_ENTRY
+
#define SAVE_FLAGS(x) pushfq; popq %rax
-#endif

#define SWAPGS swapgs
/*
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -907,14 +907,13 @@ extern void default_banner(void);
ANNOTATE_RETPOLINE_SAFE; \
jmp PARA_INDIRECT(pv_ops+PV_CPU_usergs_sysret64);)

-#ifdef CONFIG_DEBUG_ENTRY
#define SAVE_FLAGS(clobbers) \
PARA_SITE(PARA_PATCH(PV_IRQ_save_fl), \
PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
ANNOTATE_RETPOLINE_SAFE; \
call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \
PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
-#endif
+
#endif /* CONFIG_PARAVIRT_XXL */
#endif /* CONFIG_X86_64 */



2020-05-07 14:21:55

by Alexandre Chartre

[permalink] [raw]
Subject: Re: [patch V4 part 2 10/18] x86/entry/64: Check IF in __preempt_enable_notrace() thunk



On 5/5/20 3:41 PM, Thomas Gleixner wrote:
> The preempt_enable_notrace() ASM thunk is called from tracing, entry code
> RCU and other places which are already in or going to be in the noinstr
> section which protects sensitve code from being instrumented.

typo: "sensitve"

alex.

> Calls out of these sections happen with interrupts disabled, which is
> handled in C code, but the push regs, call, pop regs sequence can be
> completely avoided in this case.
>
> This is also a preparatory step for annotating the call from the thunk to
> preempt_enable_notrace() safe from a noinstr section.
>
> Signed-off-by: Thomas Gleixner <[email protected]>
> ---
> arch/x86/entry/thunk_64.S | 27 +++++++++++++++++++++++----
> arch/x86/include/asm/irqflags.h | 3 +--
> arch/x86/include/asm/paravirt.h | 3 +--
> 3 files changed, 25 insertions(+), 8 deletions(-)
>
> --- a/arch/x86/entry/thunk_64.S
> +++ b/arch/x86/entry/thunk_64.S
> @@ -9,10 +9,28 @@
> #include "calling.h"
> #include <asm/asm.h>
> #include <asm/export.h>
> +#include <asm/irqflags.h>
> +
> +.code64
>
> /* rdi: arg1 ... normal C conventions. rax is saved/restored. */
> - .macro THUNK name, func, put_ret_addr_in_rdi=0
> + .macro THUNK name, func, put_ret_addr_in_rdi=0, check_if=0
> SYM_FUNC_START_NOALIGN(\name)
> +
> + .if \check_if
> + /*
> + * Check for interrupts disabled right here. No point in
> + * going all the way down
> + */
> + pushq %rax
> + SAVE_FLAGS(CLBR_RAX)
> + testl $X86_EFLAGS_IF, %eax
> + popq %rax
> + jnz 1f
> + ret
> +1:
> + .endif
> +
> pushq %rbp
> movq %rsp, %rbp
>
> @@ -38,14 +56,15 @@ SYM_FUNC_END(\name)
> .endm
>
> #ifdef CONFIG_TRACE_IRQFLAGS
> - THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1
> - THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1
> + THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller, put_ret_addr_in_rdi=1
> + THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller, put_ret_addr_in_rdi=1
> #endif
>
> #ifdef CONFIG_PREEMPTION
> THUNK preempt_schedule_thunk, preempt_schedule
> - THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace
> EXPORT_SYMBOL(preempt_schedule_thunk)
> +
> + THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace, check_if=1
> EXPORT_SYMBOL(preempt_schedule_notrace_thunk)
> #endif
>
> --- a/arch/x86/include/asm/irqflags.h
> +++ b/arch/x86/include/asm/irqflags.h
> @@ -127,9 +127,8 @@ static inline notrace unsigned long arch
> #define DISABLE_INTERRUPTS(x) cli
>
> #ifdef CONFIG_X86_64
> -#ifdef CONFIG_DEBUG_ENTRY
> +
> #define SAVE_FLAGS(x) pushfq; popq %rax
> -#endif
>
> #define SWAPGS swapgs
> /*
> --- a/arch/x86/include/asm/paravirt.h
> +++ b/arch/x86/include/asm/paravirt.h
> @@ -907,14 +907,13 @@ extern void default_banner(void);
> ANNOTATE_RETPOLINE_SAFE; \
> jmp PARA_INDIRECT(pv_ops+PV_CPU_usergs_sysret64);)
>
> -#ifdef CONFIG_DEBUG_ENTRY
> #define SAVE_FLAGS(clobbers) \
> PARA_SITE(PARA_PATCH(PV_IRQ_save_fl), \
> PV_SAVE_REGS(clobbers | CLBR_CALLEE_SAVE); \
> ANNOTATE_RETPOLINE_SAFE; \
> call PARA_INDIRECT(pv_ops+PV_IRQ_save_fl); \
> PV_RESTORE_REGS(clobbers | CLBR_CALLEE_SAVE);)
> -#endif
> +
> #endif /* CONFIG_PARAVIRT_XXL */
> #endif /* CONFIG_X86_64 */
>
>

2020-05-09 00:13:02

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [patch V4 part 2 10/18] x86/entry/64: Check IF in __preempt_enable_notrace() thunk

On Tue, May 5, 2020 at 7:14 AM Thomas Gleixner <[email protected]> wrote:
>
> The preempt_enable_notrace() ASM thunk is called from tracing, entry code
> RCU and other places which are already in or going to be in the noinstr
> section which protects sensitve code from being instrumented.

This text and $SUBJECT agree that you're talking about
preempt_enable_notrace(), but:

> + THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace, check_if=1

You actually seem to be changing preempt_schedule_notrace().

The actual code in question has this comment:

/**
* preempt_schedule_notrace - preempt_schedule called by tracing
*
* The tracing infrastructure uses preempt_enable_notrace to prevent
* recursion and tracing preempt enabling caused by the tracing
* infrastructure itself. But as tracing can happen in areas coming
* from userspace or just about to enter userspace, a preempt enable
* can occur before user_exit() is called. This will cause the scheduler
* to be called when the system is still in usermode.
*
* To prevent this, the preempt_enable_notrace will use this function
* instead of preempt_schedule() to exit user context if needed before
* calling the scheduler.
*/

Which is no longer really applicable to x86 -- in the state that this
comment nonsensically refers to as "userspace", x86 *always* has IRQs
off, which means that preempt_enable() will not schedule.

So I'm guessing that the issue you're solving is that we have
redundant preempt disable/enable pairs somewhere in the bowels of
tracing code that is called with IRQs off, and objtool is now
complaining. Could the actual code in question be fixed to assert
that IRQs are off instead of disabling preemption? If not, can you
fix the $SUBJECT and changelog and perhaps add a comment to the code
as to *why* you're checking IF? Otherwise some intrepid programmer is
going to notice it down the road, wonder if it's optimizing anything
useful at all, and get rid of it.

2020-05-09 10:28:37

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch V4 part 2 10/18] x86/entry/64: Check IF in __preempt_enable_notrace() thunk

Andy Lutomirski <[email protected]> writes:
> On Tue, May 5, 2020 at 7:14 AM Thomas Gleixner <[email protected]> wrote:
>>
>> The preempt_enable_notrace() ASM thunk is called from tracing, entry code
>> RCU and other places which are already in or going to be in the noinstr
>> section which protects sensitve code from being instrumented.
>
> This text and $SUBJECT agree that you're talking about
> preempt_enable_notrace(), but:
>
>> + THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace, check_if=1
>
> You actually seem to be changing preempt_schedule_notrace().

Duh, yes.

> The actual code in question has this comment:
>
> /**
> * preempt_schedule_notrace - preempt_schedule called by tracing
> *
> * The tracing infrastructure uses preempt_enable_notrace to prevent
> * recursion and tracing preempt enabling caused by the tracing
> * infrastructure itself. But as tracing can happen in areas coming
> * from userspace or just about to enter userspace, a preempt enable
> * can occur before user_exit() is called. This will cause the scheduler
> * to be called when the system is still in usermode.
> *
> * To prevent this, the preempt_enable_notrace will use this function
> * instead of preempt_schedule() to exit user context if needed before
> * calling the scheduler.
> */
>
> Which is no longer really applicable to x86 -- in the state that this
> comment nonsensically refers to as "userspace", x86 *always* has IRQs
> off, which means that preempt_enable() will not schedule.
>
> So I'm guessing that the issue you're solving is that we have
> redundant preempt disable/enable pairs somewhere in the bowels of
> tracing code that is called with IRQs off, and objtool is now
> complaining. Could the actual code in question be fixed to assert
> that IRQs are off instead of disabling preemption? If not, can you
> fix the $SUBJECT and changelog and perhaps add a comment to the code
> as to *why* you're checking IF? Otherwise some intrepid programmer is
> going to notice it down the road, wonder if it's optimizing anything
> useful at all, and get rid of it.

Let me stare into that again.

Thanks,

tglx

2020-05-10 18:51:40

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch V4 part 2 10/18] x86/entry/64: Check IF in __preempt_enable_notrace() thunk

Thomas Gleixner <[email protected]> writes:
> Andy Lutomirski <[email protected]> writes:
>> On Tue, May 5, 2020 at 7:14 AM Thomas Gleixner <[email protected]> wrote:
>> /**
>> * preempt_schedule_notrace - preempt_schedule called by tracing
>> *
>> * The tracing infrastructure uses preempt_enable_notrace to prevent
>> * recursion and tracing preempt enabling caused by the tracing
>> * infrastructure itself. But as tracing can happen in areas coming
>> * from userspace or just about to enter userspace, a preempt enable
>> * can occur before user_exit() is called. This will cause the scheduler
>> * to be called when the system is still in usermode.
>> *
>> * To prevent this, the preempt_enable_notrace will use this function
>> * instead of preempt_schedule() to exit user context if needed before
>> * calling the scheduler.
>> */
>>
>> Which is no longer really applicable to x86 -- in the state that this
>> comment nonsensically refers to as "userspace", x86 *always* has IRQs
>> off, which means that preempt_enable() will not schedule.

Yeah.

>> So I'm guessing that the issue you're solving is that we have
>> redundant preempt disable/enable pairs somewhere in the bowels of
>> tracing code that is called with IRQs off, and objtool is now
>> complaining. Could the actual code in question be fixed to assert
>> that IRQs are off instead of disabling preemption? If not, can you
>> fix the $SUBJECT and changelog and perhaps add a comment to the code
>> as to *why* you're checking IF? Otherwise some intrepid programmer is
>> going to notice it down the road, wonder if it's optimizing anything
>> useful at all, and get rid of it.
>
> Let me stare into that again.

There are a few preempt_disable/enable() pairs in some of the helper
functions which are called in various places. That means we would have
to chase all of them and provide 'naked' helpers for these particular
call chains. I'll fix the changelog and add a comment to make clear what
this is about.

Thanks,

tglx

2020-05-11 18:29:30

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch V4 part 2 10/18] x86/entry/64: Check IF in __preempt_enable_notrace() thunk

Thomas Gleixner <[email protected]> writes:
> Thomas Gleixner <[email protected]> writes:
>> Let me stare into that again.
>
> There are a few preempt_disable/enable() pairs in some of the helper
> functions which are called in various places. That means we would have
> to chase all of them and provide 'naked' helpers for these particular
> call chains. I'll fix the changelog and add a comment to make clear what
> this is about.

I actually sat down and chased it. It's mostly the tracing code - again,
particularly the hardware latency tracer. There is really no point to
invoke that from the guts of nmi_enter() and nmi_exit().

Neither for #DB, #BP nor #MCE there is a reason to invoke that at
all. If someone does hardware latency analysis then #DB and #BP should
not be in use at all. If so, shrug. If #MCE hits, then the hardware
induced latency is the least of the worries.

So the only relevant place is actually NMI which wants to be tracked to
avoid false positives. But that tracking really can wait to the point
where the NMI has actually reached halfways stable state.

The other place which as preempt_disable/enable_notrace() in it is
rcu_is_watching() but it's trivial enough to provide a naked version for
that.

Thanks,

tglx

8<------------------
Subject: nmi, tracing: Provide nmi_enter/exit_notrace()
From: Thomas Gleixner <[email protected]>
Date: Mon, 11 May 2020 10:57:16 +0200

To fully isolate #DB and #BP from instrumentable code it's necessary to
avoid invoking the hardware latency tracer on nmi_enter/exit().

Provide nmi_enter/exit() variants which are not invoking the hardware
latency tracer. That allows to put calls explicitely into the call sites
outside of the kprobe handling.

Signed-off-by: Thomas Gleixner <[email protected]>
---
V5: New patch
---
include/linux/hardirq.h | 18 ++++++++++++++----
1 file changed, 14 insertions(+), 4 deletions(-)

--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -77,28 +77,38 @@ extern void irq_exit(void);
/*
* nmi_enter() can nest up to 15 times; see NMI_BITS.
*/
-#define nmi_enter() \
+#define nmi_enter_notrace() \
do { \
arch_nmi_enter(); \
printk_nmi_enter(); \
lockdep_off(); \
- ftrace_nmi_enter(); \
BUG_ON(in_nmi() == NMI_MASK); \
__preempt_count_add(NMI_OFFSET + HARDIRQ_OFFSET); \
rcu_nmi_enter(); \
lockdep_hardirq_enter(); \
} while (0)

-#define nmi_exit() \
+#define nmi_enter() \
+ do { \
+ nmi_enter_notrace(); \
+ ftrace_nmi_enter(); \
+ } while (0)
+
+#define nmi_exit_notrace() \
do { \
lockdep_hardirq_exit(); \
rcu_nmi_exit(); \
BUG_ON(!in_nmi()); \
__preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \
- ftrace_nmi_exit(); \
lockdep_on(); \
printk_nmi_exit(); \
arch_nmi_exit(); \
} while (0)

+#define nmi_exit() \
+ do { \
+ ftrace_nmi_exit(); \
+ nmi_exit_notrace(); \
+ } while (0)
+
#endif /* LINUX_HARDIRQ_H */

2020-05-12 01:50:40

by Steven Rostedt

[permalink] [raw]
Subject: Re: [patch V4 part 2 10/18] x86/entry/64: Check IF in __preempt_enable_notrace() thunk

On Fri, 8 May 2020 17:10:09 -0700
Andy Lutomirski <[email protected]> wrote:

> On Tue, May 5, 2020 at 7:14 AM Thomas Gleixner <[email protected]> wrote:
> >
> > The preempt_enable_notrace() ASM thunk is called from tracing, entry code
> > RCU and other places which are already in or going to be in the noinstr
> > section which protects sensitve code from being instrumented.
>
> This text and $SUBJECT agree that you're talking about
> preempt_enable_notrace(), but:
>
> > + THUNK preempt_schedule_notrace_thunk, preempt_schedule_notrace, check_if=1
>
> You actually seem to be changing preempt_schedule_notrace().
>
> The actual code in question has this comment:
>
> /**
> * preempt_schedule_notrace - preempt_schedule called by tracing
> *
> * The tracing infrastructure uses preempt_enable_notrace to prevent
> * recursion and tracing preempt enabling caused by the tracing
> * infrastructure itself. But as tracing can happen in areas coming
> * from userspace or just about to enter userspace, a preempt enable
> * can occur before user_exit() is called. This will cause the scheduler
> * to be called when the system is still in usermode.
> *
> * To prevent this, the preempt_enable_notrace will use this function
> * instead of preempt_schedule() to exit user context if needed before
> * calling the scheduler.
> */
>
> Which is no longer really applicable to x86 -- in the state that this
> comment nonsensically refers to as "userspace", x86 *always* has IRQs
> off, which means that preempt_enable() will not schedule.
>
> So I'm guessing that the issue you're solving is that we have
> redundant preempt disable/enable pairs somewhere in the bowels of
> tracing code that is called with IRQs off, and objtool is now
> complaining. Could the actual code in question be fixed to assert
> that IRQs are off instead of disabling preemption? If not, can you
> fix the $SUBJECT and changelog and perhaps add a comment to the code
> as to *why* you're checking IF? Otherwise some intrepid programmer is
> going to notice it down the road, wonder if it's optimizing anything
> useful at all, and get rid of it.

The commit that added that code is this:

29bb9e5a75684106a37593ad75ec75ff8312731b

And it may not be applicable anymore, especially after Thomas's
patches. I'll go and stare at that some more. A lot has changed since
2013 ;-)

-- Steve

2020-05-12 01:53:18

by Steven Rostedt

[permalink] [raw]
Subject: Re: [patch V4 part 2 10/18] x86/entry/64: Check IF in __preempt_enable_notrace() thunk

On Tue, 05 May 2020 15:41:22 +0200
Thomas Gleixner <[email protected]> wrote:

> The preempt_enable_notrace() ASM thunk is called from tracing, entry code
> RCU and other places which are already in or going to be in the noinstr
> section which protects sensitve code from being instrumented.
>
> Calls out of these sections happen with interrupts disabled, which is
> handled in C code, but the push regs, call, pop regs sequence can be
> completely avoided in this case.
>
> This is also a preparatory step for annotating the call from the thunk to
> preempt_enable_notrace() safe from a noinstr section.
>

BTW, after applying this patch, I get the following error:

/work/git/linux-test.git/arch/x86/entry/thunk_64.S: Assembler messages:
/work/git/linux-test.git/arch/x86/entry/thunk_64.S:67: Error: invalid operands (*UND* and *UND* sections) for `+'
/work/git/linux-test.git/arch/x86/entry/thunk_64.S:67: Error: invalid operands (*UND* and *ABS* sections) for `/'
make[3]: *** [/work/git/linux-test.git/scripts/Makefile.build:349: arch/x86/entry/thunk_64.o] Error 1
make[3]: *** Waiting for unfinished jobs....

Config attached.

-- Steve


Attachments:
(No filename) (1.15 kB)
config.gz (33.84 kB)
Download all attachments

2020-05-12 08:16:32

by Thomas Gleixner

[permalink] [raw]
Subject: Re: [patch V4 part 2 10/18] x86/entry/64: Check IF in __preempt_enable_notrace() thunk

Steven Rostedt <[email protected]> writes:
> On Tue, 05 May 2020 15:41:22 +0200
> Thomas Gleixner <[email protected]> wrote:
>
>> The preempt_enable_notrace() ASM thunk is called from tracing, entry code
>> RCU and other places which are already in or going to be in the noinstr
>> section which protects sensitve code from being instrumented.
>>
>> Calls out of these sections happen with interrupts disabled, which is
>> handled in C code, but the push regs, call, pop regs sequence can be
>> completely avoided in this case.
>>
>> This is also a preparatory step for annotating the call from the thunk to
>> preempt_enable_notrace() safe from a noinstr section.
>>
>
> BTW, after applying this patch, I get the following error:
>
> /work/git/linux-test.git/arch/x86/entry/thunk_64.S: Assembler messages:
> /work/git/linux-test.git/arch/x86/entry/thunk_64.S:67: Error: invalid operands (*UND* and *UND* sections) for `+'
> /work/git/linux-test.git/arch/x86/entry/thunk_64.S:67: Error: invalid operands (*UND* and *ABS* sections) for `/'
> make[3]: *** [/work/git/linux-test.git/scripts/Makefile.build:349: arch/x86/entry/thunk_64.o] Error 1
> make[3]: *** Waiting for unfinished jobs....

Yes I know, but I'm going to drop that patch completely.