In IRQ/NMI induced VM exits, KVM VMX needs to execute the respective
handlers, which requires the software to create a FRED stack frame,
and use it to invoke the handlers. Add fred_irq_entry_from_kvm() for
this job.
Export fred_entry_from_kvm() because VMX can be compiled as a module.
Suggested-by: Sean Christopherson <[email protected]>
Tested-by: Shan Kang <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Signed-off-by: Xin Li <[email protected]>
---
Changes since v9:
* Shove the whole thing into arch/x86/entry/entry_64_fred.S for invoking
external_interrupt() and fred_exc_nmi() (Sean Christopherson).
* Correct and improve a few comments (Sean Christopherson).
* Merge the two IRQ/NMI asm entries into one as it's fine to invoke
noinstr code from regular code (Thomas Gleixner).
* Setup the long mode and NMI flags in the augmented SS field of FRED
stack frame in C instead of asm (Thomas Gleixner).
* Add UNWIND_HINT_{SAVE,RESTORE} to get rid of the warning: "objtool:
asm_fred_entry_from_kvm+0x0: unreachable instruction" (Peter Zijlstra).
Changes since v8:
* Add a new macro VMX_DO_FRED_EVENT_IRQOFF for FRED instead of
refactoring VMX_DO_EVENT_IRQOFF (Sean Christopherson).
* Do NOT use a trampoline, just LEA+PUSH the return RIP, PUSH the error
code, and jump to the FRED kernel entry point for NMI or call
external_interrupt() for IRQs (Sean Christopherson).
* Call external_interrupt() only when FRED is enabled, and convert the
non-FRED handling to external_interrupt() after FRED lands (Sean
Christopherson).
---
arch/x86/entry/entry_64_fred.S | 73 ++++++++++++++++++++++++++++++++++
arch/x86/entry/entry_fred.c | 14 +++++++
arch/x86/include/asm/fred.h | 18 +++++++++
3 files changed, 105 insertions(+)
diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
index d1c2fc4af8ae..f1088d6f2054 100644
--- a/arch/x86/entry/entry_64_fred.S
+++ b/arch/x86/entry/entry_64_fred.S
@@ -4,7 +4,9 @@
*/
#include <asm/asm.h>
+#include <asm/export.h>
#include <asm/fred.h>
+#include <asm/segment.h>
#include "calling.h"
@@ -54,3 +56,74 @@ SYM_CODE_START_NOALIGN(asm_fred_entrypoint_kernel)
FRED_EXIT
ERETS
SYM_CODE_END(asm_fred_entrypoint_kernel)
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+SYM_FUNC_START(asm_fred_entry_from_kvm)
+ push %rbp
+ mov %rsp, %rbp
+
+ UNWIND_HINT_SAVE
+
+ /*
+ * Don't check the FRED stack level, the call stack leading to this
+ * helper is effectively constant and shallow (relatively speaking).
+ *
+ * Emulate the FRED-defined redzone and stack alignment.
+ */
+ sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
+ and $FRED_STACK_FRAME_RSP_MASK, %rsp
+
+ /*
+ * Start to push a FRED stack frame, which is always 64 bytes:
+ *
+ * +--------+-----------------+
+ * | Bytes | Usage |
+ * +--------+-----------------+
+ * | 63:56 | Reserved |
+ * | 55:48 | Event Data |
+ * | 47:40 | SS + Event Info |
+ * | 39:32 | RSP |
+ * | 31:24 | RFLAGS |
+ * | 23:16 | CS + Aux Info |
+ * | 15:8 | RIP |
+ * | 7:0 | Error Code |
+ * +--------+-----------------+
+ */
+ push $0 /* Reserved, must be 0 */
+ push $0 /* Event data, 0 for IRQ/NMI */
+ push %rdi /* fred_ss handed in by the caller */
+ push %rbp
+ pushf
+ mov $__KERNEL_CS, %rax
+ push %rax
+
+ /*
+ * Unlike the IDT event delivery, FRED _always_ pushes an error code
+ * after pushing the return RIP, thus the CALL instruction CANNOT be
+ * used here to push the return RIP, otherwise there is no chance to
+ * push an error code before invoking the IRQ/NMI handler.
+ *
+ * Use LEA to get the return RIP and push it, then push an error code.
+ */
+ lea 1f(%rip), %rax
+ push %rax /* Return RIP */
+ push $0 /* Error code, 0 for IRQ/NMI */
+
+ PUSH_AND_CLEAR_REGS clear_bp=0 unwind_hint=0
+ movq %rsp, %rdi /* %rdi -> pt_regs */
+ call __fred_entry_from_kvm /* Call the C entry point */
+ POP_REGS
+ ERETS
+1:
+ /*
+ * Objtool doesn't understand what ERETS does, this hint tells it that
+ * yes, we'll reach here and with what stack state. A save/restore pair
+ * isn't strictly needed, but it's the simplest form.
+ */
+ UNWIND_HINT_RESTORE
+ pop %rbp
+ RET
+
+SYM_FUNC_END(asm_fred_entry_from_kvm)
+EXPORT_SYMBOL_GPL(asm_fred_entry_from_kvm);
+#endif
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
index 2fd3e421e066..f8774611af80 100644
--- a/arch/x86/entry/entry_fred.c
+++ b/arch/x86/entry/entry_fred.c
@@ -242,3 +242,17 @@ __visible noinstr void fred_entry_from_kernel(struct pt_regs *regs)
return fred_bad_type(regs, error_code);
}
}
+
+#if IS_ENABLED(CONFIG_KVM_INTEL)
+__visible noinstr void __fred_entry_from_kvm(struct pt_regs *regs)
+{
+ switch (regs->fred_ss.type) {
+ case EVENT_TYPE_EXTINT:
+ return fred_extint(regs);
+ case EVENT_TYPE_NMI:
+ return fred_exc_nmi(regs);
+ default:
+ WARN_ON_ONCE(1);
+ }
+}
+#endif
diff --git a/arch/x86/include/asm/fred.h b/arch/x86/include/asm/fred.h
index 16a64ffecbf8..2fa9f34e5c95 100644
--- a/arch/x86/include/asm/fred.h
+++ b/arch/x86/include/asm/fred.h
@@ -9,6 +9,7 @@
#include <linux/const.h>
#include <asm/asm.h>
+#include <asm/trapnr.h>
/*
* FRED event return instruction opcodes for ERET{S,U}; supported in
@@ -62,12 +63,29 @@ static __always_inline unsigned long fred_event_data(struct pt_regs *regs)
void asm_fred_entrypoint_user(void);
void asm_fred_entrypoint_kernel(void);
+void asm_fred_entry_from_kvm(struct fred_ss);
__visible void fred_entry_from_user(struct pt_regs *regs);
__visible void fred_entry_from_kernel(struct pt_regs *regs);
+__visible void __fred_entry_from_kvm(struct pt_regs *regs);
+
+/* Can be called from noinstr code, thus __always_inline */
+static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector)
+{
+ struct fred_ss ss = {
+ .ss =__KERNEL_DS,
+ .type = type,
+ .vector = vector,
+ .nmi = type == EVENT_TYPE_NMI,
+ .lm = 1,
+ };
+
+ asm_fred_entry_from_kvm(ss);
+}
#else /* CONFIG_X86_FRED */
static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; }
+static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
#endif /* CONFIG_X86_FRED */
#endif /* !__ASSEMBLY__ */
--
2.34.1
On 9/14/23 06:48, Xin Li wrote:
> + /*
> + * Don't check the FRED stack level, the call stack leading to this
> + * helper is effectively constant and shallow (relatively speaking).
It's more that we don't need to protect from reentrancy. The external
interrupt uses stack level 0 so no adjustment would be needed anyway,
and NMI does not use an IST even in the non-FRED case.
> + * Emulate the FRED-defined redzone and stack alignment.
> + */
> + sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
> + and $FRED_STACK_FRAME_RSP_MASK, %rsp
> > + /*
> > + * Don't check the FRED stack level, the call stack leading to this
> > + * helper is effectively constant and shallow (relatively speaking).
>
> It's more that we don't need to protect from reentrancy. The external
> interrupt uses stack level 0 so no adjustment would be needed anyway,
> and NMI does not use an IST even in the non-FRED case.
I will incorporate this comment.
I think a VMX NMI is kind of like a user level NMI, and we don't need
to worry about nested NMIs.
>
> > + * Emulate the FRED-defined redzone and stack alignment.
> > + */
> > + sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
> > + and $FRED_STACK_FRAME_RSP_MASK, %rsp
On 14.09.23 г. 7:48 ч., Xin Li wrote:
> In IRQ/NMI induced VM exits, KVM VMX needs to execute the respective
> handlers, which requires the software to create a FRED stack frame,
> and use it to invoke the handlers. Add fred_irq_entry_from_kvm() for
> this job.
>
> Export fred_entry_from_kvm() because VMX can be compiled as a module.
>
> Suggested-by: Sean Christopherson <[email protected]>
> Tested-by: Shan Kang <[email protected]>
> Signed-off-by: Thomas Gleixner <[email protected]>
> Signed-off-by: Xin Li <[email protected]>
> ---
>
> Changes since v9:
> * Shove the whole thing into arch/x86/entry/entry_64_fred.S for invoking
> external_interrupt() and fred_exc_nmi() (Sean Christopherson).
> * Correct and improve a few comments (Sean Christopherson).
> * Merge the two IRQ/NMI asm entries into one as it's fine to invoke
> noinstr code from regular code (Thomas Gleixner).
> * Setup the long mode and NMI flags in the augmented SS field of FRED
> stack frame in C instead of asm (Thomas Gleixner).
> * Add UNWIND_HINT_{SAVE,RESTORE} to get rid of the warning: "objtool:
> asm_fred_entry_from_kvm+0x0: unreachable instruction" (Peter Zijlstra).
>
> Changes since v8:
> * Add a new macro VMX_DO_FRED_EVENT_IRQOFF for FRED instead of
> refactoring VMX_DO_EVENT_IRQOFF (Sean Christopherson).
> * Do NOT use a trampoline, just LEA+PUSH the return RIP, PUSH the error
> code, and jump to the FRED kernel entry point for NMI or call
> external_interrupt() for IRQs (Sean Christopherson).
> * Call external_interrupt() only when FRED is enabled, and convert the
> non-FRED handling to external_interrupt() after FRED lands (Sean
> Christopherson).
> ---
> arch/x86/entry/entry_64_fred.S | 73 ++++++++++++++++++++++++++++++++++
> arch/x86/entry/entry_fred.c | 14 +++++++
> arch/x86/include/asm/fred.h | 18 +++++++++
> 3 files changed, 105 insertions(+)
>
> diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
> index d1c2fc4af8ae..f1088d6f2054 100644
> --- a/arch/x86/entry/entry_64_fred.S
> +++ b/arch/x86/entry/entry_64_fred.S
> @@ -4,7 +4,9 @@
> */
>
> #include <asm/asm.h>
> +#include <asm/export.h>
> #include <asm/fred.h>
> +#include <asm/segment.h>
>
> #include "calling.h"
>
> @@ -54,3 +56,74 @@ SYM_CODE_START_NOALIGN(asm_fred_entrypoint_kernel)
> FRED_EXIT
> ERETS
> SYM_CODE_END(asm_fred_entrypoint_kernel)
> +
> +#if IS_ENABLED(CONFIG_KVM_INTEL)
> +SYM_FUNC_START(asm_fred_entry_from_kvm)
> + push %rbp
> + mov %rsp, %rbp
use FRAME_BEGIN/FRAME_END macros to ommit this code if
CONFIG_FRAME_POINTER is disabled.
> +
> + UNWIND_HINT_SAVE
> +
> + /*
> + * Don't check the FRED stack level, the call stack leading to this
> + * helper is effectively constant and shallow (relatively speaking).
> + *
> + * Emulate the FRED-defined redzone and stack alignment.
> + */
> + sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
> + and $FRED_STACK_FRAME_RSP_MASK, %rsp
> +
> + /*
> + * Start to push a FRED stack frame, which is always 64 bytes:
> + *
> + * +--------+-----------------+
> + * | Bytes | Usage |
> + * +--------+-----------------+
> + * | 63:56 | Reserved |
> + * | 55:48 | Event Data |
> + * | 47:40 | SS + Event Info |
> + * | 39:32 | RSP |
> + * | 31:24 | RFLAGS |
> + * | 23:16 | CS + Aux Info |
> + * | 15:8 | RIP |
> + * | 7:0 | Error Code |
> + * +--------+-----------------+
> + */
> + push $0 /* Reserved, must be 0 */
> + push $0 /* Event data, 0 for IRQ/NMI */
> + push %rdi /* fred_ss handed in by the caller */
> + push %rbp
> + pushf
> + mov $__KERNEL_CS, %rax
> + push %rax
> +
> + /*
> + * Unlike the IDT event delivery, FRED _always_ pushes an error code
> + * after pushing the return RIP, thus the CALL instruction CANNOT be
> + * used here to push the return RIP, otherwise there is no chance to
> + * push an error code before invoking the IRQ/NMI handler.
> + *
> + * Use LEA to get the return RIP and push it, then push an error code.
> + */
> + lea 1f(%rip), %rax
> + push %rax /* Return RIP */
> + push $0 /* Error code, 0 for IRQ/NMI */
> +
> + PUSH_AND_CLEAR_REGS clear_bp=0 unwind_hint=0
> + movq %rsp, %rdi /* %rdi -> pt_regs */
> + call __fred_entry_from_kvm /* Call the C entry point */
> + POP_REGS
> + ERETS
> +1:
> + /*
> + * Objtool doesn't understand what ERETS does, this hint tells it that
> + * yes, we'll reach here and with what stack state. A save/restore pair
> + * isn't strictly needed, but it's the simplest form.
> + */
> + UNWIND_HINT_RESTORE
> + pop %rbp
FRAME_END
> + RET
> +
> +SYM_FUNC_END(asm_fred_entry_from_kvm)
> +EXPORT_SYMBOL_GPL(asm_fred_entry_from_kvm);
> +#endif
<snip>
On 9/21/23 14:11, Nikolay Borisov wrote:
>>
>> +SYM_FUNC_START(asm_fred_entry_from_kvm)
>> + push %rbp
>> + mov %rsp, %rbp
>
> use FRAME_BEGIN/FRAME_END macros to ommit this code if
> CONFIG_FRAME_POINTER is disabled.
No, the previous stack pointer is used below, so the code might as well
use %rbp for that; but it must do so unconditionally.
Paolo
>> +
>> + UNWIND_HINT_SAVE
>> +
>> + /*
>> + * Don't check the FRED stack level, the call stack leading to this
>> + * helper is effectively constant and shallow (relatively speaking).
>> + *
>> + * Emulate the FRED-defined redzone and stack alignment.
>> + */
>> + sub $(FRED_CONFIG_REDZONE_AMOUNT << 6), %rsp
>> + and $FRED_STACK_FRAME_RSP_MASK, %rsp
>> +
>> + /*
>> + * Start to push a FRED stack frame, which is always 64 bytes:
>> + *
>> + * +--------+-----------------+
>> + * | Bytes | Usage |
>> + * +--------+-----------------+
>> + * | 63:56 | Reserved |
>> + * | 55:48 | Event Data |
>> + * | 47:40 | SS + Event Info |
>> + * | 39:32 | RSP |
>> + * | 31:24 | RFLAGS |
>> + * | 23:16 | CS + Aux Info |
>> + * | 15:8 | RIP |
>> + * | 7:0 | Error Code |
>> + * +--------+-----------------+
>> + */
>> + push $0 /* Reserved, must be 0 */
>> + push $0 /* Event data, 0 for IRQ/NMI */
>> + push %rdi /* fred_ss handed in by the caller */
>> + push %rbp
^^ here
Paolo
>> + pushf
>> + mov $__KERNEL_CS, %rax
>> + push %rax