When running as Xen pv-guest the exception frame on the stack contains
%r11 and %rcx additional to the other data pushed by the processor.
Instead of having a paravirt op being called for each exception type
prepend the Xen specific code to each exception entry. When running as
Xen pv-guest just use the exception entry with prepended instructions,
otherwise use the entry without the Xen specific code.
Signed-off-by: Juergen Gross <[email protected]>
---
arch/x86/entry/entry_64.S | 23 ++------
arch/x86/entry/entry_64_compat.S | 1 -
arch/x86/include/asm/paravirt.h | 5 --
arch/x86/include/asm/paravirt_types.h | 4 --
arch/x86/include/asm/proto.h | 3 ++
arch/x86/include/asm/traps.h | 33 ++++++++++--
arch/x86/kernel/asm-offsets_64.c | 1 -
arch/x86/kernel/paravirt.c | 3 --
arch/x86/xen/enlighten_pv.c | 99 +++++++++++++++++++++++------------
arch/x86/xen/irq.c | 3 --
arch/x86/xen/xen-asm_64.S | 45 ++++++++++++++--
arch/x86/xen/xen-ops.h | 1 -
12 files changed, 143 insertions(+), 78 deletions(-)
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index d271fb79248f..67fefaf21312 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -746,7 +746,6 @@ ENTRY(\sym)
.endif
ASM_CLAC
- PARAVIRT_ADJUST_EXCEPTION_FRAME
.ifeq \has_error_code
pushq $-1 /* ORIG_RAX: no syscall to restart */
@@ -902,7 +901,7 @@ ENTRY(do_softirq_own_stack)
END(do_softirq_own_stack)
#ifdef CONFIG_XEN
-idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
+idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
/*
* A note on the "critical region" in our callback handler.
@@ -968,8 +967,6 @@ ENTRY(xen_failsafe_callback)
movq 8(%rsp), %r11
addq $0x30, %rsp
pushq $0 /* RIP */
- pushq %r11
- pushq %rcx
jmp general_protection
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
movq (%rsp), %rcx
@@ -998,9 +995,8 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
idtentry stack_segment do_stack_segment has_error_code=1
#ifdef CONFIG_XEN
-idtentry xen_debug do_debug has_error_code=0
-idtentry xen_int3 do_int3 has_error_code=0
-idtentry xen_stack_segment do_stack_segment has_error_code=1
+idtentry xendebug do_debug has_error_code=0
+idtentry xenint3 do_int3 has_error_code=0
#endif
idtentry general_protection do_general_protection has_error_code=1
@@ -1160,20 +1156,9 @@ ENTRY(error_exit)
END(error_exit)
/* Runs on exception stack */
+/* XXX: broken on Xen PV */
ENTRY(nmi)
/*
- * Fix up the exception frame if we're on Xen.
- * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
- * one value to the stack on native, so it may clobber the rdx
- * scratch slot, but it won't clobber any of the important
- * slots past it.
- *
- * Xen is a different story, because the Xen frame itself overlaps
- * the "NMI executing" variable.
- */
- PARAVIRT_ADJUST_EXCEPTION_FRAME
-
- /*
* We allow breakpoints in NMIs. If a breakpoint occurs, then
* the iretq it performs will take us out of NMI context.
* This means that we can have nested NMIs where the next
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index e1721dafbcb1..843d2f08397b 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -294,7 +294,6 @@ ENTRY(entry_INT80_compat)
/*
* Interrupts are off on entry.
*/
- PARAVIRT_ADJUST_EXCEPTION_FRAME
ASM_CLAC /* Do this early to minimize exposure */
SWAPGS
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 9ccac1926587..c25dd22f7c70 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -960,11 +960,6 @@ extern void default_banner(void);
#define GET_CR2_INTO_RAX \
call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)
-#define PARAVIRT_ADJUST_EXCEPTION_FRAME \
- PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
- CLBR_NONE, \
- call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
-
#define USERGS_SYSRET64 \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
CLBR_NONE, \
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 9ffc36bfe4cd..c55106726938 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -195,10 +195,6 @@ struct pv_irq_ops {
void (*safe_halt)(void);
void (*halt)(void);
-
-#ifdef CONFIG_X86_64
- void (*adjust_exception_frame)(void);
-#endif
} __no_randomize_layout;
struct pv_mmu_ops {
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 8d3964fc5f91..b408b1886195 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -24,6 +24,9 @@ void entry_SYSENTER_compat(void);
void __end_entry_SYSENTER_compat(void);
void entry_SYSCALL_compat(void);
void entry_INT80_compat(void);
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+void xen_entry_INT80_compat(void);
+#endif
#endif
void x86_configure_nx(void);
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 01fd0a7f48cd..935709829a4e 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -13,9 +13,6 @@ asmlinkage void divide_error(void);
asmlinkage void debug(void);
asmlinkage void nmi(void);
asmlinkage void int3(void);
-asmlinkage void xen_debug(void);
-asmlinkage void xen_int3(void);
-asmlinkage void xen_stack_segment(void);
asmlinkage void overflow(void);
asmlinkage void bounds(void);
asmlinkage void invalid_op(void);
@@ -38,6 +35,32 @@ asmlinkage void machine_check(void);
#endif /* CONFIG_X86_MCE */
asmlinkage void simd_coprocessor_error(void);
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+asmlinkage void xen_divide_error(void);
+asmlinkage void xen_xendebug(void);
+asmlinkage void xen_xenint3(void);
+asmlinkage void xen_nmi(void);
+asmlinkage void xen_overflow(void);
+asmlinkage void xen_bounds(void);
+asmlinkage void xen_invalid_op(void);
+asmlinkage void xen_device_not_available(void);
+asmlinkage void xen_double_fault(void);
+asmlinkage void xen_coprocessor_segment_overrun(void);
+asmlinkage void xen_invalid_TSS(void);
+asmlinkage void xen_segment_not_present(void);
+asmlinkage void xen_stack_segment(void);
+asmlinkage void xen_general_protection(void);
+asmlinkage void xen_page_fault(void);
+asmlinkage void xen_async_page_fault(void);
+asmlinkage void xen_spurious_interrupt_bug(void);
+asmlinkage void xen_coprocessor_error(void);
+asmlinkage void xen_alignment_check(void);
+#ifdef CONFIG_X86_MCE
+asmlinkage void xen_machine_check(void);
+#endif /* CONFIG_X86_MCE */
+asmlinkage void xen_simd_coprocessor_error(void);
+#endif
+
#ifdef CONFIG_TRACING
asmlinkage void trace_page_fault(void);
#define trace_stack_segment stack_segment
@@ -54,6 +77,10 @@ asmlinkage void trace_page_fault(void);
#define trace_alignment_check alignment_check
#define trace_simd_coprocessor_error simd_coprocessor_error
#define trace_async_page_fault async_page_fault
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+asmlinkage void xen_trace_page_fault(void);
+#endif
#endif
dotraplinkage void do_divide_error(struct pt_regs *, long);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 99332f550c48..cf42206926af 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -20,7 +20,6 @@ static char syscalls_ia32[] = {
int main(void)
{
#ifdef CONFIG_PARAVIRT
- OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
BLANK();
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index bc0a849589bb..a14df9eecfed 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -319,9 +319,6 @@ __visible struct pv_irq_ops pv_irq_ops = {
.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
.safe_halt = native_safe_halt,
.halt = native_halt,
-#ifdef CONFIG_X86_64
- .adjust_exception_frame = paravirt_nop,
-#endif
};
__visible struct pv_cpu_ops pv_cpu_ops = {
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 811e4ddb3f37..a3dcd83187ce 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -579,6 +579,71 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
preempt_enable();
}
+#ifdef CONFIG_X86_64
+static struct {
+ void (*orig)(void);
+ void (*xen)(void);
+ bool ist_okay;
+ bool handle;
+} trap_array[] = {
+ { debug, xen_xendebug, true, true },
+ { int3, xen_xenint3, true, true },
+ { double_fault, xen_double_fault, true, false },
+#ifdef CONFIG_X86_MCE
+ { machine_check, xen_machine_check, true, true },
+#endif
+ { nmi, xen_nmi, true, true },
+ { overflow, xen_overflow, false, true },
+#ifdef CONFIG_IA32_EMULATION
+ { entry_INT80_compat, xen_entry_INT80_compat, false, true },
+#endif
+ { page_fault, xen_page_fault, false, true },
+ { divide_error, xen_divide_error, false, true },
+ { bounds, xen_bounds, false, true },
+ { invalid_op, xen_invalid_op, false, true },
+ { device_not_available, xen_device_not_available, false, true },
+ { coprocessor_segment_overrun, xen_coprocessor_segment_overrun,
+ false, true },
+ { invalid_TSS, xen_invalid_TSS, false, true },
+ { segment_not_present, xen_segment_not_present, false, true },
+ { stack_segment, xen_stack_segment, false, true },
+ { general_protection, xen_general_protection, false, true },
+ { spurious_interrupt_bug, xen_spurious_interrupt_bug, false, true },
+ { coprocessor_error, xen_coprocessor_error, false, true },
+ { alignment_check, xen_alignment_check, false, true },
+ { simd_coprocessor_error, xen_simd_coprocessor_error, false, true },
+#ifdef CONFIG_TRACING
+ { trace_page_fault, xen_trace_page_fault, false, true },
+#endif
+};
+
+static bool get_trap_addr(unsigned long *addr, unsigned int ist)
+{
+ unsigned int nr;
+ bool handle = true, ist_okay = false;
+
+ /*
+ * Replace trap handler addresses by Xen specific ones.
+ * Check for known traps using IST and whitelist them.
+ * The debugger ones are the only ones we care about.
+ * Xen will handle faults like double_fault, * so we should never see
+ * them. Warn if there's an unexpected IST-using fault handler.
+ */
+ for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++)
+ if (*addr == (unsigned long)trap_array[nr].orig) {
+ *addr = (unsigned long)trap_array[nr].xen;
+ ist_okay = trap_array[nr].ist_okay;
+ handle = trap_array[nr].handle;
+ break;
+ }
+
+ if (WARN_ON(ist != 0 && !ist_okay))
+ handle = false;
+
+ return handle;
+}
+#endif
+
static int cvt_gate_to_trap(int vector, const gate_desc *val,
struct trap_info *info)
{
@@ -591,40 +656,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
addr = gate_offset(*val);
#ifdef CONFIG_X86_64
- /*
- * Look for known traps using IST, and substitute them
- * appropriately. The debugger ones are the only ones we care
- * about. Xen will handle faults like double_fault,
- * so we should never see them. Warn if
- * there's an unexpected IST-using fault handler.
- */
- if (addr == (unsigned long)debug)
- addr = (unsigned long)xen_debug;
- else if (addr == (unsigned long)int3)
- addr = (unsigned long)xen_int3;
- else if (addr == (unsigned long)stack_segment)
- addr = (unsigned long)xen_stack_segment;
- else if (addr == (unsigned long)double_fault) {
- /* Don't need to handle these */
+ if (!get_trap_addr(&addr, val->ist))
return 0;
-#ifdef CONFIG_X86_MCE
- } else if (addr == (unsigned long)machine_check) {
- /*
- * when xen hypervisor inject vMCE to guest,
- * use native mce handler to handle it
- */
- ;
-#endif
- } else if (addr == (unsigned long)nmi)
- /*
- * Use the native version as well.
- */
- ;
- else {
- /* Some other trap using IST? */
- if (WARN_ON(val->ist != 0))
- return 0;
- }
#endif /* CONFIG_X86_64 */
info->address = addr;
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 33e92955e09d..d4eff5676cfa 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -123,9 +123,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {
.safe_halt = xen_safe_halt,
.halt = xen_halt,
-#ifdef CONFIG_X86_64
- .adjust_exception_frame = xen_adjust_exception_frame,
-#endif
};
void __init xen_init_irq_ops(void)
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index c3df43141e70..f72ff71cc897 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -22,11 +22,46 @@
#include "xen-asm.h"
-ENTRY(xen_adjust_exception_frame)
- mov 8+0(%rsp), %rcx
- mov 8+8(%rsp), %r11
- ret $16
-ENDPROC(xen_adjust_exception_frame)
+.macro xen_pv_trap name
+ENTRY(xen_\name)
+ pop %rcx
+ pop %r11
+ jmp \name
+END(xen_\name)
+.endm
+
+xen_pv_trap divide_error
+xen_pv_trap debug
+xen_pv_trap xendebug
+xen_pv_trap int3
+xen_pv_trap xenint3
+xen_pv_trap nmi
+xen_pv_trap overflow
+xen_pv_trap bounds
+xen_pv_trap invalid_op
+xen_pv_trap device_not_available
+xen_pv_trap double_fault
+xen_pv_trap coprocessor_segment_overrun
+xen_pv_trap invalid_TSS
+xen_pv_trap segment_not_present
+xen_pv_trap stack_segment
+xen_pv_trap general_protection
+xen_pv_trap page_fault
+xen_pv_trap async_page_fault
+xen_pv_trap spurious_interrupt_bug
+xen_pv_trap coprocessor_error
+xen_pv_trap alignment_check
+#ifdef CONFIG_X86_MCE
+xen_pv_trap machine_check
+#endif /* CONFIG_X86_MCE */
+xen_pv_trap simd_coprocessor_error
+#ifdef CONFIG_IA32_EMULATION
+xen_pv_trap entry_INT80_compat
+#endif
+#ifdef CONFIG_TRACING
+xen_pv_trap trace_page_fault
+#endif
+xen_pv_trap hypervisor_callback
hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
/*
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 0d5004477db6..18e8729d42bd 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -145,7 +145,6 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long);
__visible void xen_iret(void);
__visible void xen_sysret32(void);
__visible void xen_sysret64(void);
-__visible void xen_adjust_exception_frame(void);
extern int xen_panic_handler_init(void);
--
2.12.3
> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
> index 811e4ddb3f37..a3dcd83187ce 100644
> --- a/arch/x86/xen/enlighten_pv.c
> +++ b/arch/x86/xen/enlighten_pv.c
> @@ -579,6 +579,71 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
> preempt_enable();
> }
>
> +#ifdef CONFIG_X86_64
> +static struct {
> + void (*orig)(void);
> + void (*xen)(void);
> + bool ist_okay;
> + bool handle;
> +} trap_array[] = {
> + { debug, xen_xendebug, true, true },
> + { int3, xen_xenint3, true, true },
> + { double_fault, xen_double_fault, true, false },
Is it really worth adding 'handle' member to the structure because of a
single special case? We don't expect to ever have another such vector.
(TBH, I think current implementation of cvt_gate_to_trap() is clearer,
even if it is not as general as what is in this patch. I know that Andy
disagrees).
-boris
> +#ifdef CONFIG_X86_MCE
> + { machine_check, xen_machine_check, true, true },
> +#endif
> + { nmi, xen_nmi, true, true },
> + { overflow, xen_overflow, false, true },
> +#ifdef CONFIG_IA32_EMULATION
> + { entry_INT80_compat, xen_entry_INT80_compat, false, true },
> +#endif
> + { page_fault, xen_page_fault, false, true },
> + { divide_error, xen_divide_error, false, true },
> + { bounds, xen_bounds, false, true },
> + { invalid_op, xen_invalid_op, false, true },
> + { device_not_available, xen_device_not_available, false, true },
> + { coprocessor_segment_overrun, xen_coprocessor_segment_overrun,
> + false, true },
> + { invalid_TSS, xen_invalid_TSS, false, true },
> + { segment_not_present, xen_segment_not_present, false, true },
> + { stack_segment, xen_stack_segment, false, true },
> + { general_protection, xen_general_protection, false, true },
> + { spurious_interrupt_bug, xen_spurious_interrupt_bug, false, true },
> + { coprocessor_error, xen_coprocessor_error, false, true },
> + { alignment_check, xen_alignment_check, false, true },
> + { simd_coprocessor_error, xen_simd_coprocessor_error, false, true },
> +#ifdef CONFIG_TRACING
> + { trace_page_fault, xen_trace_page_fault, false, true },
> +#endif
> +};
> +
> +static bool get_trap_addr(unsigned long *addr, unsigned int ist)
> +{
> + unsigned int nr;
> + bool handle = true, ist_okay = false;
> +
> + /*
> + * Replace trap handler addresses by Xen specific ones.
> + * Check for known traps using IST and whitelist them.
> + * The debugger ones are the only ones we care about.
> + * Xen will handle faults like double_fault, * so we should never see
> + * them. Warn if there's an unexpected IST-using fault handler.
> + */
> + for (nr = 0; nr < ARRAY_SIZE(trap_array); nr++)
> + if (*addr == (unsigned long)trap_array[nr].orig) {
> + *addr = (unsigned long)trap_array[nr].xen;
> + ist_okay = trap_array[nr].ist_okay;
> + handle = trap_array[nr].handle;
> + break;
> + }
> +
> + if (WARN_ON(ist != 0 && !ist_okay))
> + handle = false;
> +
> + return handle;
> +}
> +#endif
> +
> static int cvt_gate_to_trap(int vector, const gate_desc *val,
> struct trap_info *info)
> {
> @@ -591,40 +656,8 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
>
> addr = gate_offset(*val);
> #ifdef CONFIG_X86_64
> - /*
> - * Look for known traps using IST, and substitute them
> - * appropriately. The debugger ones are the only ones we care
> - * about. Xen will handle faults like double_fault,
> - * so we should never see them. Warn if
> - * there's an unexpected IST-using fault handler.
> - */
> - if (addr == (unsigned long)debug)
> - addr = (unsigned long)xen_debug;
> - else if (addr == (unsigned long)int3)
> - addr = (unsigned long)xen_int3;
> - else if (addr == (unsigned long)stack_segment)
> - addr = (unsigned long)xen_stack_segment;
> - else if (addr == (unsigned long)double_fault) {
> - /* Don't need to handle these */
> + if (!get_trap_addr(&addr, val->ist))
> return 0;
> -#ifdef CONFIG_X86_MCE
> - } else if (addr == (unsigned long)machine_check) {
> - /*
> - * when xen hypervisor inject vMCE to guest,
> - * use native mce handler to handle it
> - */
> - ;
> -#endif
> - } else if (addr == (unsigned long)nmi)
> - /*
> - * Use the native version as well.
> - */
> - ;
> - else {
> - /* Some other trap using IST? */
> - if (WARN_ON(val->ist != 0))
> - return 0;
> - }
> #endif /* CONFIG_X86_64 */
> info->address = addr;
>
On Mon, Aug 7, 2017 at 1:56 PM, Boris Ostrovsky
<[email protected]> wrote:
>
>> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
>> index 811e4ddb3f37..a3dcd83187ce 100644
>> --- a/arch/x86/xen/enlighten_pv.c
>> +++ b/arch/x86/xen/enlighten_pv.c
>> @@ -579,6 +579,71 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
>> preempt_enable();
>> }
>>
>> +#ifdef CONFIG_X86_64
>> +static struct {
>> + void (*orig)(void);
>> + void (*xen)(void);
>> + bool ist_okay;
>> + bool handle;
>> +} trap_array[] = {
>> + { debug, xen_xendebug, true, true },
>> + { int3, xen_xenint3, true, true },
>> + { double_fault, xen_double_fault, true, false },
>
> Is it really worth adding 'handle' member to the structure because of a
> single special case? We don't expect to ever have another such vector.
>
> (TBH, I think current implementation of cvt_gate_to_trap() is clearer,
> even if it is not as general as what is in this patch. I know that Andy
> disagrees).
I have no real opinion either way. I just think it's nicer to put it
in cvt_gate_to_trap() instead of the the traps.c setup code.
--Andy
On 07/08/17 22:56, Boris Ostrovsky wrote:
>
>> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
>> index 811e4ddb3f37..a3dcd83187ce 100644
>> --- a/arch/x86/xen/enlighten_pv.c
>> +++ b/arch/x86/xen/enlighten_pv.c
>> @@ -579,6 +579,71 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
>> preempt_enable();
>> }
>>
>> +#ifdef CONFIG_X86_64
>> +static struct {
>> + void (*orig)(void);
>> + void (*xen)(void);
>> + bool ist_okay;
>> + bool handle;
>> +} trap_array[] = {
>> + { debug, xen_xendebug, true, true },
>> + { int3, xen_xenint3, true, true },
>> + { double_fault, xen_double_fault, true, false },
>
> Is it really worth adding 'handle' member to the structure because of a
> single special case? We don't expect to ever have another such vector.
Hmm, maybe you are right. We don't expect to ever see a double_fault in
a pv domain, so we could just drop that special case by handling it like
the other IST traps.
> (TBH, I think current implementation of cvt_gate_to_trap() is clearer,
> even if it is not as general as what is in this patch. I know that Andy
> disagrees).
I think being able to concentrate as much pv interface stuff as possible
to Xen specific sources is a win.
The less Xen modifications are needed in non-Xen sources the better.
Juergen
On 08/08/17 08:02, Juergen Gross wrote:
> On 07/08/17 22:56, Boris Ostrovsky wrote:
>>> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
>>> index 811e4ddb3f37..a3dcd83187ce 100644
>>> --- a/arch/x86/xen/enlighten_pv.c
>>> +++ b/arch/x86/xen/enlighten_pv.c
>>> @@ -579,6 +579,71 @@ static void xen_write_ldt_entry(struct desc_struct *dt, int entrynum,
>>> preempt_enable();
>>> }
>>>
>>> +#ifdef CONFIG_X86_64
>>> +static struct {
>>> + void (*orig)(void);
>>> + void (*xen)(void);
>>> + bool ist_okay;
>>> + bool handle;
>>> +} trap_array[] = {
>>> + { debug, xen_xendebug, true, true },
>>> + { int3, xen_xenint3, true, true },
>>> + { double_fault, xen_double_fault, true, false },
>> Is it really worth adding 'handle' member to the structure because of a
>> single special case? We don't expect to ever have another such vector.
> Hmm, maybe you are right. We don't expect to ever see a double_fault in
> a pv domain, so we could just drop that special case by handling it like
> the other IST traps.
(This is steeped in a lot of history.) There is no path where Xen will
raise #DF with a PV guest.
As Linux sets the DPL of the #DF handler to 0, the `int $8` emulation
will inject #GP (even for kernel uses!) rather than follow the #DF path.
I do however want to see about making Xen's behaviour rather more
architectural, e.g. raising #NP rather than crashing the guest. If we
do get to a position of properly using contributory exceptions, #DF will
behave as #MC and #NMI currently do, by either switching from userspace
to the kernel stack, or pushed normally onto the current stack.
In some copious free time, it might also be good to invent a PV ABI to
behave like IST (even if only for the failsafe callback) so Linux can
actually recover sufficiently from a stack overflow to print some
diagnostics.
~Andrew