2017-08-01 10:40:12

by Jürgen Groß

[permalink] [raw]
Subject: [PATCH v2] xen: get rid of paravirt op adjust_exception_frame

When running as Xen pv-guest the exception frame on the stack contains
%r11 and %rcx additional to the other data pushed by the processor.

Instead of having a paravirt op being called for each exception type
prepend the Xen specific code to each exception entry. When running as
Xen pv-guest just use the exception entry with prepended instructions,
otherwise use the entry without the Xen specific code.

Signed-off-by: Juergen Gross <[email protected]>
---
arch/x86/entry/entry_64.S | 22 ++------------
arch/x86/entry/entry_64_compat.S | 1 -
arch/x86/include/asm/desc.h | 16 ++++++++++
arch/x86/include/asm/paravirt.h | 5 ---
arch/x86/include/asm/paravirt_types.h | 4 ---
arch/x86/include/asm/proto.h | 3 ++
arch/x86/include/asm/traps.h | 51 +++++++++++++++++++++++++++++--
arch/x86/kernel/asm-offsets_64.c | 1 -
arch/x86/kernel/paravirt.c | 3 --
arch/x86/kernel/traps.c | 57 ++++++++++++++++++-----------------
arch/x86/xen/enlighten_pv.c | 16 +++++-----
arch/x86/xen/irq.c | 3 --
arch/x86/xen/xen-asm_64.S | 45 ++++++++++++++++++++++++---
arch/x86/xen/xen-ops.h | 1 -
14 files changed, 147 insertions(+), 81 deletions(-)

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index a9a8027a6c0e..602bcf68a32c 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -745,7 +745,6 @@ ENTRY(\sym)
.endif

ASM_CLAC
- PARAVIRT_ADJUST_EXCEPTION_FRAME

.ifeq \has_error_code
pushq $-1 /* ORIG_RAX: no syscall to restart */
@@ -901,7 +900,7 @@ ENTRY(do_softirq_own_stack)
END(do_softirq_own_stack)

#ifdef CONFIG_XEN
-idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
+idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0

/*
* A note on the "critical region" in our callback handler.
@@ -967,8 +966,6 @@ ENTRY(xen_failsafe_callback)
movq 8(%rsp), %r11
addq $0x30, %rsp
pushq $0 /* RIP */
- pushq %r11
- pushq %rcx
jmp general_protection
1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
movq (%rsp), %rcx
@@ -997,9 +994,8 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
idtentry stack_segment do_stack_segment has_error_code=1

#ifdef CONFIG_XEN
-idtentry xen_debug do_debug has_error_code=0
-idtentry xen_int3 do_int3 has_error_code=0
-idtentry xen_stack_segment do_stack_segment has_error_code=1
+idtentry xendebug do_debug has_error_code=0
+idtentry xenint3 do_int3 has_error_code=0
#endif

idtentry general_protection do_general_protection has_error_code=1
@@ -1161,18 +1157,6 @@ END(error_exit)
/* Runs on exception stack */
ENTRY(nmi)
/*
- * Fix up the exception frame if we're on Xen.
- * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
- * one value to the stack on native, so it may clobber the rdx
- * scratch slot, but it won't clobber any of the important
- * slots past it.
- *
- * Xen is a different story, because the Xen frame itself overlaps
- * the "NMI executing" variable.
- */
- PARAVIRT_ADJUST_EXCEPTION_FRAME
-
- /*
* We allow breakpoints in NMIs. If a breakpoint occurs, then
* the iretq it performs will take us out of NMI context.
* This means that we can have nested NMIs where the next
diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S
index e1721dafbcb1..843d2f08397b 100644
--- a/arch/x86/entry/entry_64_compat.S
+++ b/arch/x86/entry/entry_64_compat.S
@@ -294,7 +294,6 @@ ENTRY(entry_INT80_compat)
/*
* Interrupts are off on entry.
*/
- PARAVIRT_ADJUST_EXCEPTION_FRAME
ASM_CLAC /* Do this early to minimize exposure */
SWAPGS

diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h
index d0a21b12dd58..f581e61ffcaa 100644
--- a/arch/x86/include/asm/desc.h
+++ b/arch/x86/include/asm/desc.h
@@ -9,6 +9,8 @@
#include <linux/smp.h>
#include <linux/percpu.h>

+#include <xen/xen.h>
+
static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info)
{
desc->limit0 = info->limit & 0x0ffff;
@@ -83,6 +85,12 @@ static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu)
return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu));
}

+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+#define pv_trap_entry(name) (void *)(xen_pv_domain() ? xen_ ## name : name)
+#else
+#define pv_trap_entry(name) (void *)(name)
+#endif
+
#ifdef CONFIG_X86_64

static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
@@ -482,6 +490,14 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
0, 0, __KERNEL_CS); \
} while (0)

+#define set_intr_gate_pv(n, addr) \
+ do { \
+ set_intr_gate_notrace(n, pv_trap_entry(addr)); \
+ _trace_set_gate(n, GATE_INTERRUPT, \
+ pv_trap_entry(trace_##addr), \
+ 0, 0, __KERNEL_CS); \
+ } while (0)
+
extern int first_system_vector;
/* used_vectors is BITMAP for irq is not managed by percpu vector_irq */
extern unsigned long used_vectors[];
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 9ccac1926587..c25dd22f7c70 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -960,11 +960,6 @@ extern void default_banner(void);
#define GET_CR2_INTO_RAX \
call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2)

-#define PARAVIRT_ADJUST_EXCEPTION_FRAME \
- PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \
- CLBR_NONE, \
- call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame))
-
#define USERGS_SYSRET64 \
PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \
CLBR_NONE, \
diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
index 9ffc36bfe4cd..c55106726938 100644
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -195,10 +195,6 @@ struct pv_irq_ops {

void (*safe_halt)(void);
void (*halt)(void);
-
-#ifdef CONFIG_X86_64
- void (*adjust_exception_frame)(void);
-#endif
} __no_randomize_layout;

struct pv_mmu_ops {
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 8d3964fc5f91..b408b1886195 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -24,6 +24,9 @@ void entry_SYSENTER_compat(void);
void __end_entry_SYSENTER_compat(void);
void entry_SYSCALL_compat(void);
void entry_INT80_compat(void);
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+void xen_entry_INT80_compat(void);
+#endif
#endif

void x86_configure_nx(void);
diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
index 01fd0a7f48cd..e2ae5f3b9c2c 100644
--- a/arch/x86/include/asm/traps.h
+++ b/arch/x86/include/asm/traps.h
@@ -13,9 +13,8 @@ asmlinkage void divide_error(void);
asmlinkage void debug(void);
asmlinkage void nmi(void);
asmlinkage void int3(void);
-asmlinkage void xen_debug(void);
-asmlinkage void xen_int3(void);
-asmlinkage void xen_stack_segment(void);
+asmlinkage void xendebug(void);
+asmlinkage void xenint3(void);
asmlinkage void overflow(void);
asmlinkage void bounds(void);
asmlinkage void invalid_op(void);
@@ -38,6 +37,34 @@ asmlinkage void machine_check(void);
#endif /* CONFIG_X86_MCE */
asmlinkage void simd_coprocessor_error(void);

+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+asmlinkage void xen_divide_error(void);
+asmlinkage void xen_debug(void);
+asmlinkage void xen_xendebug(void);
+asmlinkage void xen_int3(void);
+asmlinkage void xen_xenint3(void);
+asmlinkage void xen_nmi(void);
+asmlinkage void xen_overflow(void);
+asmlinkage void xen_bounds(void);
+asmlinkage void xen_invalid_op(void);
+asmlinkage void xen_device_not_available(void);
+asmlinkage void xen_double_fault(void);
+asmlinkage void xen_coprocessor_segment_overrun(void);
+asmlinkage void xen_invalid_TSS(void);
+asmlinkage void xen_segment_not_present(void);
+asmlinkage void xen_stack_segment(void);
+asmlinkage void xen_general_protection(void);
+asmlinkage void xen_page_fault(void);
+asmlinkage void xen_async_page_fault(void);
+asmlinkage void xen_spurious_interrupt_bug(void);
+asmlinkage void xen_coprocessor_error(void);
+asmlinkage void xen_alignment_check(void);
+#ifdef CONFIG_X86_MCE
+asmlinkage void xen_machine_check(void);
+#endif /* CONFIG_X86_MCE */
+asmlinkage void xen_simd_coprocessor_error(void);
+#endif
+
#ifdef CONFIG_TRACING
asmlinkage void trace_page_fault(void);
#define trace_stack_segment stack_segment
@@ -54,6 +81,24 @@ asmlinkage void trace_page_fault(void);
#define trace_alignment_check alignment_check
#define trace_simd_coprocessor_error simd_coprocessor_error
#define trace_async_page_fault async_page_fault
+
+#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
+asmlinkage void xen_trace_page_fault(void);
+#define xen_trace_stack_segment xen_stack_segment
+#define xen_trace_divide_error xen_divide_error
+#define xen_trace_bounds xen_bounds
+#define xen_trace_invalid_op xen_invalid_op
+#define xen_trace_device_not_available xen_device_not_available
+#define xen_trace_coprocessor_segment_overrun xen_coprocessor_segment_overrun
+#define xen_trace_invalid_TSS xen_invalid_TSS
+#define xen_trace_segment_not_present xen_segment_not_present
+#define xen_trace_general_protection xen_general_protection
+#define xen_trace_spurious_interrupt_bug xen_spurious_interrupt_bug
+#define xen_trace_coprocessor_error xen_coprocessor_error
+#define xen_trace_alignment_check xen_alignment_check
+#define xen_trace_simd_coprocessor_error xen_simd_coprocessor_error
+#define xen_trace_async_page_fault xen_async_page_fault
+#endif
#endif

dotraplinkage void do_divide_error(struct pt_regs *, long);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 99332f550c48..cf42206926af 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -20,7 +20,6 @@ static char syscalls_ia32[] = {
int main(void)
{
#ifdef CONFIG_PARAVIRT
- OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame);
OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64);
OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs);
BLANK();
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index bc0a849589bb..a14df9eecfed 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -319,9 +319,6 @@ __visible struct pv_irq_ops pv_irq_ops = {
.irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
.safe_halt = native_safe_halt,
.halt = native_halt,
-#ifdef CONFIG_X86_64
- .adjust_exception_frame = paravirt_nop,
-#endif
};

__visible struct pv_cpu_ops pv_cpu_ops = {
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index bf54309b85da..a79a97a46a59 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -946,15 +946,15 @@ void __init early_trap_init(void)
* early stage. DEBUG_STACK will be equipped after cpu_init() in
* trap_init().
*
- * We don't need to set trace_idt_table like set_intr_gate(),
+ * We don't need to set trace_idt_table like set_intr_gate_pv(),
* since we don't have trace_debug and it will be reset to
* 'debug' in trap_init() by set_intr_gate_ist().
*/
set_intr_gate_notrace(X86_TRAP_DB, debug);
/* int3 can be called from all */
- set_system_intr_gate(X86_TRAP_BP, &int3);
+ set_system_intr_gate(X86_TRAP_BP, pv_trap_entry(int3));
#ifdef CONFIG_X86_32
- set_intr_gate(X86_TRAP_PF, page_fault);
+ set_intr_gate_pv(X86_TRAP_PF, page_fault);
#endif
load_idt(&idt_descr);
}
@@ -962,7 +962,7 @@ void __init early_trap_init(void)
void __init early_trap_pf_init(void)
{
#ifdef CONFIG_X86_64
- set_intr_gate(X86_TRAP_PF, page_fault);
+ set_intr_gate_pv(X86_TRAP_PF, page_fault);
#endif
}

@@ -978,42 +978,45 @@ void __init trap_init(void)
early_iounmap(p, 4);
#endif

- set_intr_gate(X86_TRAP_DE, divide_error);
- set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK);
+ set_intr_gate_pv(X86_TRAP_DE, divide_error);
+ set_intr_gate_ist(X86_TRAP_NMI, pv_trap_entry(nmi), NMI_STACK);
/* int4 can be called from all */
- set_system_intr_gate(X86_TRAP_OF, &overflow);
- set_intr_gate(X86_TRAP_BR, bounds);
- set_intr_gate(X86_TRAP_UD, invalid_op);
- set_intr_gate(X86_TRAP_NM, device_not_available);
+ set_system_intr_gate(X86_TRAP_OF, pv_trap_entry(overflow));
+ set_intr_gate_pv(X86_TRAP_BR, bounds);
+ set_intr_gate_pv(X86_TRAP_UD, invalid_op);
+ set_intr_gate_pv(X86_TRAP_NM, device_not_available);
#ifdef CONFIG_X86_32
set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS);
#else
- set_intr_gate_ist(X86_TRAP_DF, &double_fault, DOUBLEFAULT_STACK);
+ set_intr_gate_ist(X86_TRAP_DF, pv_trap_entry(double_fault),
+ DOUBLEFAULT_STACK);
#endif
- set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
- set_intr_gate(X86_TRAP_TS, invalid_TSS);
- set_intr_gate(X86_TRAP_NP, segment_not_present);
- set_intr_gate(X86_TRAP_SS, stack_segment);
- set_intr_gate(X86_TRAP_GP, general_protection);
- set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
- set_intr_gate(X86_TRAP_MF, coprocessor_error);
- set_intr_gate(X86_TRAP_AC, alignment_check);
+ set_intr_gate_pv(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
+ set_intr_gate_pv(X86_TRAP_TS, invalid_TSS);
+ set_intr_gate_pv(X86_TRAP_NP, segment_not_present);
+ set_intr_gate_pv(X86_TRAP_SS, stack_segment);
+ set_intr_gate_pv(X86_TRAP_GP, general_protection);
+ set_intr_gate_pv(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
+ set_intr_gate_pv(X86_TRAP_MF, coprocessor_error);
+ set_intr_gate_pv(X86_TRAP_AC, alignment_check);
#ifdef CONFIG_X86_MCE
- set_intr_gate_ist(X86_TRAP_MC, &machine_check, MCE_STACK);
+ set_intr_gate_ist(X86_TRAP_MC, pv_trap_entry(machine_check), MCE_STACK);
#endif
- set_intr_gate(X86_TRAP_XF, simd_coprocessor_error);
+ set_intr_gate_pv(X86_TRAP_XF, simd_coprocessor_error);

/* Reserve all the builtin and the syscall vector: */
for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++)
set_bit(i, used_vectors);

#ifdef CONFIG_IA32_EMULATION
- set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_compat);
+ set_system_intr_gate(IA32_SYSCALL_VECTOR,
+ pv_trap_entry(entry_INT80_compat));
set_bit(IA32_SYSCALL_VECTOR, used_vectors);
#endif

#ifdef CONFIG_X86_32
- set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32);
+ set_system_intr_gate(IA32_SYSCALL_VECTOR,
+ pv_trap_entry(entry_INT80_32));
set_bit(IA32_SYSCALL_VECTOR, used_vectors);
#endif

@@ -1035,15 +1038,15 @@ void __init trap_init(void)
* in early_trap_init(). However, ITS works only after
* cpu_init() loads TSS. See comments in early_trap_init().
*/
- set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK);
+ set_intr_gate_ist(X86_TRAP_DB, pv_trap_entry(debug), DEBUG_STACK);
/* int3 can be called from all */
- set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK);
+ set_system_intr_gate_ist(X86_TRAP_BP, pv_trap_entry(int3), DEBUG_STACK);

x86_init.irqs.trap_init();

#ifdef CONFIG_X86_64
memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16);
- set_nmi_gate(X86_TRAP_DB, &debug);
- set_nmi_gate(X86_TRAP_BP, &int3);
+ set_nmi_gate(X86_TRAP_DB, pv_trap_entry(debug));
+ set_nmi_gate(X86_TRAP_BP, pv_trap_entry(int3));
#endif
}
diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
index 811e4ddb3f37..7e107142bc4f 100644
--- a/arch/x86/xen/enlighten_pv.c
+++ b/arch/x86/xen/enlighten_pv.c
@@ -598,24 +598,22 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
* so we should never see them. Warn if
* there's an unexpected IST-using fault handler.
*/
- if (addr == (unsigned long)debug)
- addr = (unsigned long)xen_debug;
- else if (addr == (unsigned long)int3)
- addr = (unsigned long)xen_int3;
- else if (addr == (unsigned long)stack_segment)
- addr = (unsigned long)xen_stack_segment;
- else if (addr == (unsigned long)double_fault) {
+ if (addr == (unsigned long)xen_debug)
+ addr = (unsigned long)xen_xendebug;
+ else if (addr == (unsigned long)xen_int3)
+ addr = (unsigned long)xen_xenint3;
+ else if (addr == (unsigned long)xen_double_fault) {
/* Don't need to handle these */
return 0;
#ifdef CONFIG_X86_MCE
- } else if (addr == (unsigned long)machine_check) {
+ } else if (addr == (unsigned long)xen_machine_check) {
/*
* when xen hypervisor inject vMCE to guest,
* use native mce handler to handle it
*/
;
#endif
- } else if (addr == (unsigned long)nmi)
+ } else if (addr == (unsigned long)xen_nmi)
/*
* Use the native version as well.
*/
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index 33e92955e09d..d4eff5676cfa 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -123,9 +123,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = {

.safe_halt = xen_safe_halt,
.halt = xen_halt,
-#ifdef CONFIG_X86_64
- .adjust_exception_frame = xen_adjust_exception_frame,
-#endif
};

void __init xen_init_irq_ops(void)
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index c3df43141e70..f72ff71cc897 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -22,11 +22,46 @@

#include "xen-asm.h"

-ENTRY(xen_adjust_exception_frame)
- mov 8+0(%rsp), %rcx
- mov 8+8(%rsp), %r11
- ret $16
-ENDPROC(xen_adjust_exception_frame)
+.macro xen_pv_trap name
+ENTRY(xen_\name)
+ pop %rcx
+ pop %r11
+ jmp \name
+END(xen_\name)
+.endm
+
+xen_pv_trap divide_error
+xen_pv_trap debug
+xen_pv_trap xendebug
+xen_pv_trap int3
+xen_pv_trap xenint3
+xen_pv_trap nmi
+xen_pv_trap overflow
+xen_pv_trap bounds
+xen_pv_trap invalid_op
+xen_pv_trap device_not_available
+xen_pv_trap double_fault
+xen_pv_trap coprocessor_segment_overrun
+xen_pv_trap invalid_TSS
+xen_pv_trap segment_not_present
+xen_pv_trap stack_segment
+xen_pv_trap general_protection
+xen_pv_trap page_fault
+xen_pv_trap async_page_fault
+xen_pv_trap spurious_interrupt_bug
+xen_pv_trap coprocessor_error
+xen_pv_trap alignment_check
+#ifdef CONFIG_X86_MCE
+xen_pv_trap machine_check
+#endif /* CONFIG_X86_MCE */
+xen_pv_trap simd_coprocessor_error
+#ifdef CONFIG_IA32_EMULATION
+xen_pv_trap entry_INT80_compat
+#endif
+#ifdef CONFIG_TRACING
+xen_pv_trap trace_page_fault
+#endif
+xen_pv_trap hypervisor_callback

hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
/*
diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h
index 0d5004477db6..18e8729d42bd 100644
--- a/arch/x86/xen/xen-ops.h
+++ b/arch/x86/xen/xen-ops.h
@@ -145,7 +145,6 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long);
__visible void xen_iret(void);
__visible void xen_sysret32(void);
__visible void xen_sysret64(void);
-__visible void xen_adjust_exception_frame(void);

extern int xen_panic_handler_init(void);

--
2.12.3


2017-08-01 19:45:41

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [PATCH v2] xen: get rid of paravirt op adjust_exception_frame

On Tue, Aug 1, 2017 at 3:39 AM, Juergen Gross <[email protected]> wrote:
> When running as Xen pv-guest the exception frame on the stack contains
> %r11 and %rcx additional to the other data pushed by the processor.
>
> Instead of having a paravirt op being called for each exception type
> prepend the Xen specific code to each exception entry. When running as
> Xen pv-guest just use the exception entry with prepended instructions,
> otherwise use the entry without the Xen specific code.
>
> Signed-off-by: Juergen Gross <[email protected]>
> ---
> arch/x86/entry/entry_64.S | 22 ++------------
> arch/x86/entry/entry_64_compat.S | 1 -
> arch/x86/include/asm/desc.h | 16 ++++++++++
> arch/x86/include/asm/paravirt.h | 5 ---
> arch/x86/include/asm/paravirt_types.h | 4 ---
> arch/x86/include/asm/proto.h | 3 ++
> arch/x86/include/asm/traps.h | 51 +++++++++++++++++++++++++++++--
> arch/x86/kernel/asm-offsets_64.c | 1 -
> arch/x86/kernel/paravirt.c | 3 --
> arch/x86/kernel/traps.c | 57 ++++++++++++++++++-----------------
> arch/x86/xen/enlighten_pv.c | 16 +++++-----
> arch/x86/xen/irq.c | 3 --
> arch/x86/xen/xen-asm_64.S | 45 ++++++++++++++++++++++++---
> arch/x86/xen/xen-ops.h | 1 -
> 14 files changed, 147 insertions(+), 81 deletions(-)
>
> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
> index a9a8027a6c0e..602bcf68a32c 100644
> --- a/arch/x86/entry/entry_64.S
> +++ b/arch/x86/entry/entry_64.S
> @@ -745,7 +745,6 @@ ENTRY(\sym)
> .endif
>
> ASM_CLAC
> - PARAVIRT_ADJUST_EXCEPTION_FRAME
>
> .ifeq \has_error_code
> pushq $-1 /* ORIG_RAX: no syscall to restart */
> @@ -901,7 +900,7 @@ ENTRY(do_softirq_own_stack)
> END(do_softirq_own_stack)
>
> #ifdef CONFIG_XEN
> -idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
> +idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
>
> /*
> * A note on the "critical region" in our callback handler.
> @@ -967,8 +966,6 @@ ENTRY(xen_failsafe_callback)
> movq 8(%rsp), %r11
> addq $0x30, %rsp
> pushq $0 /* RIP */
> - pushq %r11
> - pushq %rcx
> jmp general_protection
> 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
> movq (%rsp), %rcx
> @@ -997,9 +994,8 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
> idtentry stack_segment do_stack_segment has_error_code=1
>
> #ifdef CONFIG_XEN
> -idtentry xen_debug do_debug has_error_code=0
> -idtentry xen_int3 do_int3 has_error_code=0
> -idtentry xen_stack_segment do_stack_segment has_error_code=1
> +idtentry xendebug do_debug has_error_code=0
> +idtentry xenint3 do_int3 has_error_code=0
> #endif
>
> idtentry general_protection do_general_protection has_error_code=1
> @@ -1161,18 +1157,6 @@ END(error_exit)
> /* Runs on exception stack */
> ENTRY(nmi)
> /*
> - * Fix up the exception frame if we're on Xen.
> - * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
> - * one value to the stack on native, so it may clobber the rdx
> - * scratch slot, but it won't clobber any of the important
> - * slots past it.
> - *
> - * Xen is a different story, because the Xen frame itself overlaps
> - * the "NMI executing" variable.
> - */

Based on Andrew Cooper's email, it sounds like this function is just
straight-up broken on Xen PV. Maybe change the comment to "XXX:
broken on Xen PV" or similar.

> +#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
> +#define pv_trap_entry(name) (void *)(xen_pv_domain() ? xen_ ## name : name)
> +#else
> +#define pv_trap_entry(name) (void *)(name)
> +#endif
> +

Seems reasonable to me.

> #ifdef CONFIG_X86_64
>
> static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
> @@ -482,6 +490,14 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
> 0, 0, __KERNEL_CS); \
> } while (0)
>
> +#define set_intr_gate_pv(n, addr) \
> + do { \
> + set_intr_gate_notrace(n, pv_trap_entry(addr)); \
> + _trace_set_gate(n, GATE_INTERRUPT, \
> + pv_trap_entry(trace_##addr), \
> + 0, 0, __KERNEL_CS); \
> + } while (0)

Any reason this can't be set_intr_gate((n), pv_trap_entry(addr))? Or
does that get preprocessed wrong?

> diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
> index 01fd0a7f48cd..e2ae5f3b9c2c 100644
> --- a/arch/x86/include/asm/traps.h
> +++ b/arch/x86/include/asm/traps.h
> @@ -13,9 +13,8 @@ asmlinkage void divide_error(void);
> asmlinkage void debug(void);
> asmlinkage void nmi(void);
> asmlinkage void int3(void);
> -asmlinkage void xen_debug(void);
> -asmlinkage void xen_int3(void);
> -asmlinkage void xen_stack_segment(void);
> +asmlinkage void xendebug(void);
> +asmlinkage void xenint3(void);

What are xendebug and xenint3 for? Are they because they don't use IST on Xen?

> __visible struct pv_cpu_ops pv_cpu_ops = {
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index bf54309b85da..a79a97a46a59 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -946,15 +946,15 @@ void __init early_trap_init(void)
> * early stage. DEBUG_STACK will be equipped after cpu_init() in
> * trap_init().
> *
> - * We don't need to set trace_idt_table like set_intr_gate(),
> + * We don't need to set trace_idt_table like set_intr_gate_pv(),
> * since we don't have trace_debug and it will be reset to
> * 'debug' in trap_init() by set_intr_gate_ist().
> */
> set_intr_gate_notrace(X86_TRAP_DB, debug);

This is confusing IMO. Maybe just drop the comment change? But how
does this work at all on Xen?

> - set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
> - set_intr_gate(X86_TRAP_TS, invalid_TSS);
> - set_intr_gate(X86_TRAP_NP, segment_not_present);
> - set_intr_gate(X86_TRAP_SS, stack_segment);
> - set_intr_gate(X86_TRAP_GP, general_protection);
> - set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
> - set_intr_gate(X86_TRAP_MF, coprocessor_error);
> - set_intr_gate(X86_TRAP_AC, alignment_check);
> + set_intr_gate_pv(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
> + set_intr_gate_pv(X86_TRAP_TS, invalid_TSS);
> + set_intr_gate_pv(X86_TRAP_NP, segment_not_present);
> + set_intr_gate_pv(X86_TRAP_SS, stack_segment);
> + set_intr_gate_pv(X86_TRAP_GP, general_protection);
> + set_intr_gate_pv(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
> + set_intr_gate_pv(X86_TRAP_MF, coprocessor_error);
> + set_intr_gate_pv(X86_TRAP_AC, alignment_check);

Hmm. I'm okay with this, but I'm wondering whether it might be nice
to try to have a pv op that changes what IDT writes do and remaps the
function pointer. Like...

> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
> index 811e4ddb3f37..7e107142bc4f 100644
> --- a/arch/x86/xen/enlighten_pv.c
> +++ b/arch/x86/xen/enlighten_pv.c
> @@ -598,24 +598,22 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
> * so we should never see them. Warn if
> * there's an unexpected IST-using fault handler.
> */
> - if (addr == (unsigned long)debug)
> - addr = (unsigned long)xen_debug;
> - else if (addr == (unsigned long)int3)
> - addr = (unsigned long)xen_int3;
> - else if (addr == (unsigned long)stack_segment)
> - addr = (unsigned long)xen_stack_segment;
> - else if (addr == (unsigned long)double_fault) {
> + if (addr == (unsigned long)xen_debug)
> + addr = (unsigned long)xen_xendebug;
> + else if (addr == (unsigned long)xen_int3)
> + addr = (unsigned long)xen_xenint3;
> + else if (addr == (unsigned long)xen_double_fault) {
> /* Don't need to handle these */
> return 0;

...this? Can't this list just be extended to handle all of the pv
entries instead of using the macro magic?

> diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
> index c3df43141e70..f72ff71cc897 100644
> --- a/arch/x86/xen/xen-asm_64.S
> +++ b/arch/x86/xen/xen-asm_64.S
> @@ -22,11 +22,46 @@
>
> #include "xen-asm.h"
>
> -ENTRY(xen_adjust_exception_frame)
> - mov 8+0(%rsp), %rcx
> - mov 8+8(%rsp), %r11
> - ret $16
> -ENDPROC(xen_adjust_exception_frame)
> +.macro xen_pv_trap name
> +ENTRY(xen_\name)
> + pop %rcx
> + pop %r11
> + jmp \name
> +END(xen_\name)
> +.endm
> +
> +xen_pv_trap divide_error
> +xen_pv_trap debug
> +xen_pv_trap xendebug
> +xen_pv_trap int3
> +xen_pv_trap xenint3
> +xen_pv_trap nmi
> +xen_pv_trap overflow
> +xen_pv_trap bounds
> +xen_pv_trap invalid_op
> +xen_pv_trap device_not_available
> +xen_pv_trap double_fault
> +xen_pv_trap coprocessor_segment_overrun
> +xen_pv_trap invalid_TSS
> +xen_pv_trap segment_not_present
> +xen_pv_trap stack_segment
> +xen_pv_trap general_protection
> +xen_pv_trap page_fault
> +xen_pv_trap async_page_fault
> +xen_pv_trap spurious_interrupt_bug
> +xen_pv_trap coprocessor_error
> +xen_pv_trap alignment_check
> +#ifdef CONFIG_X86_MCE
> +xen_pv_trap machine_check
> +#endif /* CONFIG_X86_MCE */
> +xen_pv_trap simd_coprocessor_error
> +#ifdef CONFIG_IA32_EMULATION
> +xen_pv_trap entry_INT80_compat
> +#endif
> +#ifdef CONFIG_TRACING
> +xen_pv_trap trace_page_fault
> +#endif
> +xen_pv_trap hypervisor_callback

I like this.

Also, IMO it would be nice to fully finish the job. Remaining steps are:

1. Unsuck the SYSCALL entries on Xen PV.
2. Unsuck the SYENTER entry on Xen PV.
3. Make a xen_nmi that's actually correct (should be trivial)

#1 is here:

https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/commit/?h=x86/entry_syscall&id=14fee348e3e86c994400d68085217d1232a637d6

Can you test it and, if you like it, either add it to your series or
ack it? If you have extra spare cycles, you could try to do #2 and
#3, too :) For #2, it might actually make sense to rig up the Xen
sysenter entry to redirect to entry_SYSCALL_compat or even an entirely
new entry point -- SYSENTER is really weird.

--Andy

2017-08-01 23:52:56

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v2] xen: get rid of paravirt op adjust_exception_frame

On Tue, Aug 1, 2017 at 4:38 PM, Andrew Cooper <[email protected]> wrote:
> On 01/08/2017 20:45, Andy Lutomirski wrote:
>> Also, IMO it would be nice to fully finish the job. Remaining steps are:
>>
>> 1. Unsuck the SYSCALL entries on Xen PV.
>> 2. Unsuck the SYENTER entry on Xen PV.
>> 3. Make a xen_nmi that's actually correct (should be trivial)
>>
>> #1 is here:
>>
>> https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/commit/?h=x86/entry_syscall&id=14fee348e3e86c994400d68085217d1232a637d6
>
> If the
>
> /* Zero-extending 32-bit regs, do not remove */
> movl %eax, %eax
>
> comments are to be believed, then the entry logic needs reordering
> slightly to:
>
> GLOBAL(entry_*_compat_after_hwframe)
> movl %eax, %eax /* Zero-extending 32-bit regs, do not remove */
> pushq %rax /* pt_regs->orig_ax */

D'oh, right. Juergen, want to make that change?

2017-08-01 23:55:23

by Andrew Cooper

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v2] xen: get rid of paravirt op adjust_exception_frame

On 01/08/2017 20:45, Andy Lutomirski wrote:
> Also, IMO it would be nice to fully finish the job. Remaining steps are:
>
> 1. Unsuck the SYSCALL entries on Xen PV.
> 2. Unsuck the SYENTER entry on Xen PV.
> 3. Make a xen_nmi that's actually correct (should be trivial)
>
> #1 is here:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/commit/?h=x86/entry_syscall&id=14fee348e3e86c994400d68085217d1232a637d6

If the

/* Zero-extending 32-bit regs, do not remove */
movl %eax, %eax

comments are to be believed, then the entry logic needs reordering
slightly to:

GLOBAL(entry_*_compat_after_hwframe)
movl %eax, %eax /* Zero-extending 32-bit regs, do not remove */
pushq %rax /* pt_regs->orig_ax */

~Andrew

(It is unfortunate this can't be simplified to pushq %eax)

2017-08-02 11:08:47

by Jürgen Groß

[permalink] [raw]
Subject: Re: [PATCH v2] xen: get rid of paravirt op adjust_exception_frame

On 01/08/17 21:45, Andy Lutomirski wrote:
> On Tue, Aug 1, 2017 at 3:39 AM, Juergen Gross <[email protected]> wrote:
>> When running as Xen pv-guest the exception frame on the stack contains
>> %r11 and %rcx additional to the other data pushed by the processor.
>>
>> Instead of having a paravirt op being called for each exception type
>> prepend the Xen specific code to each exception entry. When running as
>> Xen pv-guest just use the exception entry with prepended instructions,
>> otherwise use the entry without the Xen specific code.
>>
>> Signed-off-by: Juergen Gross <[email protected]>
>> ---
>> arch/x86/entry/entry_64.S | 22 ++------------
>> arch/x86/entry/entry_64_compat.S | 1 -
>> arch/x86/include/asm/desc.h | 16 ++++++++++
>> arch/x86/include/asm/paravirt.h | 5 ---
>> arch/x86/include/asm/paravirt_types.h | 4 ---
>> arch/x86/include/asm/proto.h | 3 ++
>> arch/x86/include/asm/traps.h | 51 +++++++++++++++++++++++++++++--
>> arch/x86/kernel/asm-offsets_64.c | 1 -
>> arch/x86/kernel/paravirt.c | 3 --
>> arch/x86/kernel/traps.c | 57 ++++++++++++++++++-----------------
>> arch/x86/xen/enlighten_pv.c | 16 +++++-----
>> arch/x86/xen/irq.c | 3 --
>> arch/x86/xen/xen-asm_64.S | 45 ++++++++++++++++++++++++---
>> arch/x86/xen/xen-ops.h | 1 -
>> 14 files changed, 147 insertions(+), 81 deletions(-)
>>
>> diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
>> index a9a8027a6c0e..602bcf68a32c 100644
>> --- a/arch/x86/entry/entry_64.S
>> +++ b/arch/x86/entry/entry_64.S
>> @@ -745,7 +745,6 @@ ENTRY(\sym)
>> .endif
>>
>> ASM_CLAC
>> - PARAVIRT_ADJUST_EXCEPTION_FRAME
>>
>> .ifeq \has_error_code
>> pushq $-1 /* ORIG_RAX: no syscall to restart */
>> @@ -901,7 +900,7 @@ ENTRY(do_softirq_own_stack)
>> END(do_softirq_own_stack)
>>
>> #ifdef CONFIG_XEN
>> -idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0
>> +idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0
>>
>> /*
>> * A note on the "critical region" in our callback handler.
>> @@ -967,8 +966,6 @@ ENTRY(xen_failsafe_callback)
>> movq 8(%rsp), %r11
>> addq $0x30, %rsp
>> pushq $0 /* RIP */
>> - pushq %r11
>> - pushq %rcx
>> jmp general_protection
>> 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */
>> movq (%rsp), %rcx
>> @@ -997,9 +994,8 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
>> idtentry stack_segment do_stack_segment has_error_code=1
>>
>> #ifdef CONFIG_XEN
>> -idtentry xen_debug do_debug has_error_code=0
>> -idtentry xen_int3 do_int3 has_error_code=0
>> -idtentry xen_stack_segment do_stack_segment has_error_code=1
>> +idtentry xendebug do_debug has_error_code=0
>> +idtentry xenint3 do_int3 has_error_code=0
>> #endif
>>
>> idtentry general_protection do_general_protection has_error_code=1
>> @@ -1161,18 +1157,6 @@ END(error_exit)
>> /* Runs on exception stack */
>> ENTRY(nmi)
>> /*
>> - * Fix up the exception frame if we're on Xen.
>> - * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most
>> - * one value to the stack on native, so it may clobber the rdx
>> - * scratch slot, but it won't clobber any of the important
>> - * slots past it.
>> - *
>> - * Xen is a different story, because the Xen frame itself overlaps
>> - * the "NMI executing" variable.
>> - */
>
> Based on Andrew Cooper's email, it sounds like this function is just
> straight-up broken on Xen PV. Maybe change the comment to "XXX:
> broken on Xen PV" or similar.

Fine with me.

>
>> +#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV)
>> +#define pv_trap_entry(name) (void *)(xen_pv_domain() ? xen_ ## name : name)
>> +#else
>> +#define pv_trap_entry(name) (void *)(name)
>> +#endif
>> +
>
> Seems reasonable to me.
>
>> #ifdef CONFIG_X86_64
>>
>> static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func,
>> @@ -482,6 +490,14 @@ static inline void _set_gate(int gate, unsigned type, void *addr,
>> 0, 0, __KERNEL_CS); \
>> } while (0)
>>
>> +#define set_intr_gate_pv(n, addr) \
>> + do { \
>> + set_intr_gate_notrace(n, pv_trap_entry(addr)); \
>> + _trace_set_gate(n, GATE_INTERRUPT, \
>> + pv_trap_entry(trace_##addr), \
>> + 0, 0, __KERNEL_CS); \
>> + } while (0)
>
> Any reason this can't be set_intr_gate((n), pv_trap_entry(addr))? Or
> does that get preprocessed wrong?

trace_##addr won't look like anything the compiler will accept with addr
being "(void *)(xen_pv_domain() ? xen_foo : foo)". :-)

>> diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h
>> index 01fd0a7f48cd..e2ae5f3b9c2c 100644
>> --- a/arch/x86/include/asm/traps.h
>> +++ b/arch/x86/include/asm/traps.h
>> @@ -13,9 +13,8 @@ asmlinkage void divide_error(void);
>> asmlinkage void debug(void);
>> asmlinkage void nmi(void);
>> asmlinkage void int3(void);
>> -asmlinkage void xen_debug(void);
>> -asmlinkage void xen_int3(void);
>> -asmlinkage void xen_stack_segment(void);
>> +asmlinkage void xendebug(void);
>> +asmlinkage void xenint3(void);
>
> What are xendebug and xenint3 for? Are they because they don't use IST on Xen?

I believe so.

>> __visible struct pv_cpu_ops pv_cpu_ops = {
>> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
>> index bf54309b85da..a79a97a46a59 100644
>> --- a/arch/x86/kernel/traps.c
>> +++ b/arch/x86/kernel/traps.c
>> @@ -946,15 +946,15 @@ void __init early_trap_init(void)
>> * early stage. DEBUG_STACK will be equipped after cpu_init() in
>> * trap_init().
>> *
>> - * We don't need to set trace_idt_table like set_intr_gate(),
>> + * We don't need to set trace_idt_table like set_intr_gate_pv(),
>> * since we don't have trace_debug and it will be reset to
>> * 'debug' in trap_init() by set_intr_gate_ist().
>> */
>> set_intr_gate_notrace(X86_TRAP_DB, debug);
>
> This is confusing IMO. Maybe just drop the comment change? But how
> does this work at all on Xen?

TBH: I don't whether it works on Xen. The changes I did here were just
mechanical ones.

>> - set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
>> - set_intr_gate(X86_TRAP_TS, invalid_TSS);
>> - set_intr_gate(X86_TRAP_NP, segment_not_present);
>> - set_intr_gate(X86_TRAP_SS, stack_segment);
>> - set_intr_gate(X86_TRAP_GP, general_protection);
>> - set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
>> - set_intr_gate(X86_TRAP_MF, coprocessor_error);
>> - set_intr_gate(X86_TRAP_AC, alignment_check);
>> + set_intr_gate_pv(X86_TRAP_OLD_MF, coprocessor_segment_overrun);
>> + set_intr_gate_pv(X86_TRAP_TS, invalid_TSS);
>> + set_intr_gate_pv(X86_TRAP_NP, segment_not_present);
>> + set_intr_gate_pv(X86_TRAP_SS, stack_segment);
>> + set_intr_gate_pv(X86_TRAP_GP, general_protection);
>> + set_intr_gate_pv(X86_TRAP_SPURIOUS, spurious_interrupt_bug);
>> + set_intr_gate_pv(X86_TRAP_MF, coprocessor_error);
>> + set_intr_gate_pv(X86_TRAP_AC, alignment_check);
>
> Hmm. I'm okay with this, but I'm wondering whether it might be nice
> to try to have a pv op that changes what IDT writes do and remaps the
> function pointer. Like...
>
>> diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c
>> index 811e4ddb3f37..7e107142bc4f 100644
>> --- a/arch/x86/xen/enlighten_pv.c
>> +++ b/arch/x86/xen/enlighten_pv.c
>> @@ -598,24 +598,22 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val,
>> * so we should never see them. Warn if
>> * there's an unexpected IST-using fault handler.
>> */
>> - if (addr == (unsigned long)debug)
>> - addr = (unsigned long)xen_debug;
>> - else if (addr == (unsigned long)int3)
>> - addr = (unsigned long)xen_int3;
>> - else if (addr == (unsigned long)stack_segment)
>> - addr = (unsigned long)xen_stack_segment;
>> - else if (addr == (unsigned long)double_fault) {
>> + if (addr == (unsigned long)xen_debug)
>> + addr = (unsigned long)xen_xendebug;
>> + else if (addr == (unsigned long)xen_int3)
>> + addr = (unsigned long)xen_xenint3;
>> + else if (addr == (unsigned long)xen_double_fault) {
>> /* Don't need to handle these */
>> return 0;
>
> ...this? Can't this list just be extended to handle all of the pv
> entries instead of using the macro magic?

I'll have a try.

>
>> diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
>> index c3df43141e70..f72ff71cc897 100644
>> --- a/arch/x86/xen/xen-asm_64.S
>> +++ b/arch/x86/xen/xen-asm_64.S
>> @@ -22,11 +22,46 @@
>>
>> #include "xen-asm.h"
>>
>> -ENTRY(xen_adjust_exception_frame)
>> - mov 8+0(%rsp), %rcx
>> - mov 8+8(%rsp), %r11
>> - ret $16
>> -ENDPROC(xen_adjust_exception_frame)
>> +.macro xen_pv_trap name
>> +ENTRY(xen_\name)
>> + pop %rcx
>> + pop %r11
>> + jmp \name
>> +END(xen_\name)
>> +.endm
>> +
>> +xen_pv_trap divide_error
>> +xen_pv_trap debug
>> +xen_pv_trap xendebug
>> +xen_pv_trap int3
>> +xen_pv_trap xenint3
>> +xen_pv_trap nmi
>> +xen_pv_trap overflow
>> +xen_pv_trap bounds
>> +xen_pv_trap invalid_op
>> +xen_pv_trap device_not_available
>> +xen_pv_trap double_fault
>> +xen_pv_trap coprocessor_segment_overrun
>> +xen_pv_trap invalid_TSS
>> +xen_pv_trap segment_not_present
>> +xen_pv_trap stack_segment
>> +xen_pv_trap general_protection
>> +xen_pv_trap page_fault
>> +xen_pv_trap async_page_fault
>> +xen_pv_trap spurious_interrupt_bug
>> +xen_pv_trap coprocessor_error
>> +xen_pv_trap alignment_check
>> +#ifdef CONFIG_X86_MCE
>> +xen_pv_trap machine_check
>> +#endif /* CONFIG_X86_MCE */
>> +xen_pv_trap simd_coprocessor_error
>> +#ifdef CONFIG_IA32_EMULATION
>> +xen_pv_trap entry_INT80_compat
>> +#endif
>> +#ifdef CONFIG_TRACING
>> +xen_pv_trap trace_page_fault
>> +#endif
>> +xen_pv_trap hypervisor_callback
>
> I like this.
>
> Also, IMO it would be nice to fully finish the job. Remaining steps are:
>
> 1. Unsuck the SYSCALL entries on Xen PV.
> 2. Unsuck the SYENTER entry on Xen PV.
> 3. Make a xen_nmi that's actually correct (should be trivial)
>
> #1 is here:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/commit/?h=x86/entry_syscall&id=14fee348e3e86c994400d68085217d1232a637d6
>
> Can you test it and, if you like it, either add it to your series or
> ack it? If you have extra spare cycles, you could try to do #2 and
> #3, too :) For #2, it might actually make sense to rig up the Xen
> sysenter entry to redirect to entry_SYSCALL_compat or even an entirely
> new entry point -- SYSENTER is really weird.

I'll look into it.


Juergen

2017-08-03 14:13:18

by Jürgen Groß

[permalink] [raw]
Subject: Re: [Xen-devel] [PATCH v2] xen: get rid of paravirt op adjust_exception_frame

On 02/08/17 01:52, Andy Lutomirski wrote:
> On Tue, Aug 1, 2017 at 4:38 PM, Andrew Cooper <[email protected]> wrote:
>> On 01/08/2017 20:45, Andy Lutomirski wrote:
>>> Also, IMO it would be nice to fully finish the job. Remaining steps are:
>>>
>>> 1. Unsuck the SYSCALL entries on Xen PV.
>>> 2. Unsuck the SYENTER entry on Xen PV.
>>> 3. Make a xen_nmi that's actually correct (should be trivial)
>>>
>>> #1 is here:
>>>
>>> https://git.kernel.org/pub/scm/linux/kernel/git/luto/linux.git/commit/?h=x86/entry_syscall&id=14fee348e3e86c994400d68085217d1232a637d6
>>
>> If the
>>
>> /* Zero-extending 32-bit regs, do not remove */
>> movl %eax, %eax
>>
>> comments are to be believed, then the entry logic needs reordering
>> slightly to:
>>
>> GLOBAL(entry_*_compat_after_hwframe)
>> movl %eax, %eax /* Zero-extending 32-bit regs, do not remove */
>> pushq %rax /* pt_regs->orig_ax */
>
> D'oh, right. Juergen, want to make that change?

Hmm, I'd prefer you sending an update which I could test and ack then.


Juergen