Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752152AbdHAKkM (ORCPT ); Tue, 1 Aug 2017 06:40:12 -0400 Received: from mx2.suse.de ([195.135.220.15]:36585 "EHLO mx1.suse.de" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1751008AbdHAKkC (ORCPT ); Tue, 1 Aug 2017 06:40:02 -0400 From: Juergen Gross To: linux-kernel@vger.kernel.org, xen-devel@lists.xenproject.org, x86@kernel.org Cc: boris.ostrovsky@oracle.com, hpa@zytor.com, tglx@linutronix.de, mingo@redhat.com, luto@amacapital.net, Juergen Gross Subject: [PATCH v2] xen: get rid of paravirt op adjust_exception_frame Date: Tue, 1 Aug 2017 12:39:54 +0200 Message-Id: <20170801103954.23904-1-jgross@suse.com> X-Mailer: git-send-email 2.12.3 Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 18822 Lines: 524 When running as Xen pv-guest the exception frame on the stack contains %r11 and %rcx additional to the other data pushed by the processor. Instead of having a paravirt op being called for each exception type prepend the Xen specific code to each exception entry. When running as Xen pv-guest just use the exception entry with prepended instructions, otherwise use the entry without the Xen specific code. Signed-off-by: Juergen Gross --- arch/x86/entry/entry_64.S | 22 ++------------ arch/x86/entry/entry_64_compat.S | 1 - arch/x86/include/asm/desc.h | 16 ++++++++++ arch/x86/include/asm/paravirt.h | 5 --- arch/x86/include/asm/paravirt_types.h | 4 --- arch/x86/include/asm/proto.h | 3 ++ arch/x86/include/asm/traps.h | 51 +++++++++++++++++++++++++++++-- arch/x86/kernel/asm-offsets_64.c | 1 - arch/x86/kernel/paravirt.c | 3 -- arch/x86/kernel/traps.c | 57 ++++++++++++++++++----------------- arch/x86/xen/enlighten_pv.c | 16 +++++----- arch/x86/xen/irq.c | 3 -- arch/x86/xen/xen-asm_64.S | 45 ++++++++++++++++++++++++--- arch/x86/xen/xen-ops.h | 1 - 14 files changed, 147 insertions(+), 81 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index a9a8027a6c0e..602bcf68a32c 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -745,7 +745,6 @@ ENTRY(\sym) .endif ASM_CLAC - PARAVIRT_ADJUST_EXCEPTION_FRAME .ifeq \has_error_code pushq $-1 /* ORIG_RAX: no syscall to restart */ @@ -901,7 +900,7 @@ ENTRY(do_softirq_own_stack) END(do_softirq_own_stack) #ifdef CONFIG_XEN -idtentry xen_hypervisor_callback xen_do_hypervisor_callback has_error_code=0 +idtentry hypervisor_callback xen_do_hypervisor_callback has_error_code=0 /* * A note on the "critical region" in our callback handler. @@ -967,8 +966,6 @@ ENTRY(xen_failsafe_callback) movq 8(%rsp), %r11 addq $0x30, %rsp pushq $0 /* RIP */ - pushq %r11 - pushq %rcx jmp general_protection 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ movq (%rsp), %rcx @@ -997,9 +994,8 @@ idtentry int3 do_int3 has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK idtentry stack_segment do_stack_segment has_error_code=1 #ifdef CONFIG_XEN -idtentry xen_debug do_debug has_error_code=0 -idtentry xen_int3 do_int3 has_error_code=0 -idtentry xen_stack_segment do_stack_segment has_error_code=1 +idtentry xendebug do_debug has_error_code=0 +idtentry xenint3 do_int3 has_error_code=0 #endif idtentry general_protection do_general_protection has_error_code=1 @@ -1161,18 +1157,6 @@ END(error_exit) /* Runs on exception stack */ ENTRY(nmi) /* - * Fix up the exception frame if we're on Xen. - * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most - * one value to the stack on native, so it may clobber the rdx - * scratch slot, but it won't clobber any of the important - * slots past it. - * - * Xen is a different story, because the Xen frame itself overlaps - * the "NMI executing" variable. - */ - PARAVIRT_ADJUST_EXCEPTION_FRAME - - /* * We allow breakpoints in NMIs. If a breakpoint occurs, then * the iretq it performs will take us out of NMI context. * This means that we can have nested NMIs where the next diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index e1721dafbcb1..843d2f08397b 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -294,7 +294,6 @@ ENTRY(entry_INT80_compat) /* * Interrupts are off on entry. */ - PARAVIRT_ADJUST_EXCEPTION_FRAME ASM_CLAC /* Do this early to minimize exposure */ SWAPGS diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index d0a21b12dd58..f581e61ffcaa 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -9,6 +9,8 @@ #include #include +#include + static inline void fill_ldt(struct desc_struct *desc, const struct user_desc *info) { desc->limit0 = info->limit & 0x0ffff; @@ -83,6 +85,12 @@ static inline phys_addr_t get_cpu_gdt_paddr(unsigned int cpu) return per_cpu_ptr_to_phys(get_cpu_gdt_rw(cpu)); } +#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +#define pv_trap_entry(name) (void *)(xen_pv_domain() ? xen_ ## name : name) +#else +#define pv_trap_entry(name) (void *)(name) +#endif + #ifdef CONFIG_X86_64 static inline void pack_gate(gate_desc *gate, unsigned type, unsigned long func, @@ -482,6 +490,14 @@ static inline void _set_gate(int gate, unsigned type, void *addr, 0, 0, __KERNEL_CS); \ } while (0) +#define set_intr_gate_pv(n, addr) \ + do { \ + set_intr_gate_notrace(n, pv_trap_entry(addr)); \ + _trace_set_gate(n, GATE_INTERRUPT, \ + pv_trap_entry(trace_##addr), \ + 0, 0, __KERNEL_CS); \ + } while (0) + extern int first_system_vector; /* used_vectors is BITMAP for irq is not managed by percpu vector_irq */ extern unsigned long used_vectors[]; diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 9ccac1926587..c25dd22f7c70 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -960,11 +960,6 @@ extern void default_banner(void); #define GET_CR2_INTO_RAX \ call PARA_INDIRECT(pv_mmu_ops+PV_MMU_read_cr2) -#define PARAVIRT_ADJUST_EXCEPTION_FRAME \ - PARA_SITE(PARA_PATCH(pv_irq_ops, PV_IRQ_adjust_exception_frame), \ - CLBR_NONE, \ - call PARA_INDIRECT(pv_irq_ops+PV_IRQ_adjust_exception_frame)) - #define USERGS_SYSRET64 \ PARA_SITE(PARA_PATCH(pv_cpu_ops, PV_CPU_usergs_sysret64), \ CLBR_NONE, \ diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index 9ffc36bfe4cd..c55106726938 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -195,10 +195,6 @@ struct pv_irq_ops { void (*safe_halt)(void); void (*halt)(void); - -#ifdef CONFIG_X86_64 - void (*adjust_exception_frame)(void); -#endif } __no_randomize_layout; struct pv_mmu_ops { diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 8d3964fc5f91..b408b1886195 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -24,6 +24,9 @@ void entry_SYSENTER_compat(void); void __end_entry_SYSENTER_compat(void); void entry_SYSCALL_compat(void); void entry_INT80_compat(void); +#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +void xen_entry_INT80_compat(void); +#endif #endif void x86_configure_nx(void); diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index 01fd0a7f48cd..e2ae5f3b9c2c 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -13,9 +13,8 @@ asmlinkage void divide_error(void); asmlinkage void debug(void); asmlinkage void nmi(void); asmlinkage void int3(void); -asmlinkage void xen_debug(void); -asmlinkage void xen_int3(void); -asmlinkage void xen_stack_segment(void); +asmlinkage void xendebug(void); +asmlinkage void xenint3(void); asmlinkage void overflow(void); asmlinkage void bounds(void); asmlinkage void invalid_op(void); @@ -38,6 +37,34 @@ asmlinkage void machine_check(void); #endif /* CONFIG_X86_MCE */ asmlinkage void simd_coprocessor_error(void); +#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +asmlinkage void xen_divide_error(void); +asmlinkage void xen_debug(void); +asmlinkage void xen_xendebug(void); +asmlinkage void xen_int3(void); +asmlinkage void xen_xenint3(void); +asmlinkage void xen_nmi(void); +asmlinkage void xen_overflow(void); +asmlinkage void xen_bounds(void); +asmlinkage void xen_invalid_op(void); +asmlinkage void xen_device_not_available(void); +asmlinkage void xen_double_fault(void); +asmlinkage void xen_coprocessor_segment_overrun(void); +asmlinkage void xen_invalid_TSS(void); +asmlinkage void xen_segment_not_present(void); +asmlinkage void xen_stack_segment(void); +asmlinkage void xen_general_protection(void); +asmlinkage void xen_page_fault(void); +asmlinkage void xen_async_page_fault(void); +asmlinkage void xen_spurious_interrupt_bug(void); +asmlinkage void xen_coprocessor_error(void); +asmlinkage void xen_alignment_check(void); +#ifdef CONFIG_X86_MCE +asmlinkage void xen_machine_check(void); +#endif /* CONFIG_X86_MCE */ +asmlinkage void xen_simd_coprocessor_error(void); +#endif + #ifdef CONFIG_TRACING asmlinkage void trace_page_fault(void); #define trace_stack_segment stack_segment @@ -54,6 +81,24 @@ asmlinkage void trace_page_fault(void); #define trace_alignment_check alignment_check #define trace_simd_coprocessor_error simd_coprocessor_error #define trace_async_page_fault async_page_fault + +#if defined(CONFIG_X86_64) && defined(CONFIG_XEN_PV) +asmlinkage void xen_trace_page_fault(void); +#define xen_trace_stack_segment xen_stack_segment +#define xen_trace_divide_error xen_divide_error +#define xen_trace_bounds xen_bounds +#define xen_trace_invalid_op xen_invalid_op +#define xen_trace_device_not_available xen_device_not_available +#define xen_trace_coprocessor_segment_overrun xen_coprocessor_segment_overrun +#define xen_trace_invalid_TSS xen_invalid_TSS +#define xen_trace_segment_not_present xen_segment_not_present +#define xen_trace_general_protection xen_general_protection +#define xen_trace_spurious_interrupt_bug xen_spurious_interrupt_bug +#define xen_trace_coprocessor_error xen_coprocessor_error +#define xen_trace_alignment_check xen_alignment_check +#define xen_trace_simd_coprocessor_error xen_simd_coprocessor_error +#define xen_trace_async_page_fault xen_async_page_fault +#endif #endif dotraplinkage void do_divide_error(struct pt_regs *, long); diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 99332f550c48..cf42206926af 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -20,7 +20,6 @@ static char syscalls_ia32[] = { int main(void) { #ifdef CONFIG_PARAVIRT - OFFSET(PV_IRQ_adjust_exception_frame, pv_irq_ops, adjust_exception_frame); OFFSET(PV_CPU_usergs_sysret64, pv_cpu_ops, usergs_sysret64); OFFSET(PV_CPU_swapgs, pv_cpu_ops, swapgs); BLANK(); diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index bc0a849589bb..a14df9eecfed 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -319,9 +319,6 @@ __visible struct pv_irq_ops pv_irq_ops = { .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable), .safe_halt = native_safe_halt, .halt = native_halt, -#ifdef CONFIG_X86_64 - .adjust_exception_frame = paravirt_nop, -#endif }; __visible struct pv_cpu_ops pv_cpu_ops = { diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index bf54309b85da..a79a97a46a59 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -946,15 +946,15 @@ void __init early_trap_init(void) * early stage. DEBUG_STACK will be equipped after cpu_init() in * trap_init(). * - * We don't need to set trace_idt_table like set_intr_gate(), + * We don't need to set trace_idt_table like set_intr_gate_pv(), * since we don't have trace_debug and it will be reset to * 'debug' in trap_init() by set_intr_gate_ist(). */ set_intr_gate_notrace(X86_TRAP_DB, debug); /* int3 can be called from all */ - set_system_intr_gate(X86_TRAP_BP, &int3); + set_system_intr_gate(X86_TRAP_BP, pv_trap_entry(int3)); #ifdef CONFIG_X86_32 - set_intr_gate(X86_TRAP_PF, page_fault); + set_intr_gate_pv(X86_TRAP_PF, page_fault); #endif load_idt(&idt_descr); } @@ -962,7 +962,7 @@ void __init early_trap_init(void) void __init early_trap_pf_init(void) { #ifdef CONFIG_X86_64 - set_intr_gate(X86_TRAP_PF, page_fault); + set_intr_gate_pv(X86_TRAP_PF, page_fault); #endif } @@ -978,42 +978,45 @@ void __init trap_init(void) early_iounmap(p, 4); #endif - set_intr_gate(X86_TRAP_DE, divide_error); - set_intr_gate_ist(X86_TRAP_NMI, &nmi, NMI_STACK); + set_intr_gate_pv(X86_TRAP_DE, divide_error); + set_intr_gate_ist(X86_TRAP_NMI, pv_trap_entry(nmi), NMI_STACK); /* int4 can be called from all */ - set_system_intr_gate(X86_TRAP_OF, &overflow); - set_intr_gate(X86_TRAP_BR, bounds); - set_intr_gate(X86_TRAP_UD, invalid_op); - set_intr_gate(X86_TRAP_NM, device_not_available); + set_system_intr_gate(X86_TRAP_OF, pv_trap_entry(overflow)); + set_intr_gate_pv(X86_TRAP_BR, bounds); + set_intr_gate_pv(X86_TRAP_UD, invalid_op); + set_intr_gate_pv(X86_TRAP_NM, device_not_available); #ifdef CONFIG_X86_32 set_task_gate(X86_TRAP_DF, GDT_ENTRY_DOUBLEFAULT_TSS); #else - set_intr_gate_ist(X86_TRAP_DF, &double_fault, DOUBLEFAULT_STACK); + set_intr_gate_ist(X86_TRAP_DF, pv_trap_entry(double_fault), + DOUBLEFAULT_STACK); #endif - set_intr_gate(X86_TRAP_OLD_MF, coprocessor_segment_overrun); - set_intr_gate(X86_TRAP_TS, invalid_TSS); - set_intr_gate(X86_TRAP_NP, segment_not_present); - set_intr_gate(X86_TRAP_SS, stack_segment); - set_intr_gate(X86_TRAP_GP, general_protection); - set_intr_gate(X86_TRAP_SPURIOUS, spurious_interrupt_bug); - set_intr_gate(X86_TRAP_MF, coprocessor_error); - set_intr_gate(X86_TRAP_AC, alignment_check); + set_intr_gate_pv(X86_TRAP_OLD_MF, coprocessor_segment_overrun); + set_intr_gate_pv(X86_TRAP_TS, invalid_TSS); + set_intr_gate_pv(X86_TRAP_NP, segment_not_present); + set_intr_gate_pv(X86_TRAP_SS, stack_segment); + set_intr_gate_pv(X86_TRAP_GP, general_protection); + set_intr_gate_pv(X86_TRAP_SPURIOUS, spurious_interrupt_bug); + set_intr_gate_pv(X86_TRAP_MF, coprocessor_error); + set_intr_gate_pv(X86_TRAP_AC, alignment_check); #ifdef CONFIG_X86_MCE - set_intr_gate_ist(X86_TRAP_MC, &machine_check, MCE_STACK); + set_intr_gate_ist(X86_TRAP_MC, pv_trap_entry(machine_check), MCE_STACK); #endif - set_intr_gate(X86_TRAP_XF, simd_coprocessor_error); + set_intr_gate_pv(X86_TRAP_XF, simd_coprocessor_error); /* Reserve all the builtin and the syscall vector: */ for (i = 0; i < FIRST_EXTERNAL_VECTOR; i++) set_bit(i, used_vectors); #ifdef CONFIG_IA32_EMULATION - set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_compat); + set_system_intr_gate(IA32_SYSCALL_VECTOR, + pv_trap_entry(entry_INT80_compat)); set_bit(IA32_SYSCALL_VECTOR, used_vectors); #endif #ifdef CONFIG_X86_32 - set_system_intr_gate(IA32_SYSCALL_VECTOR, entry_INT80_32); + set_system_intr_gate(IA32_SYSCALL_VECTOR, + pv_trap_entry(entry_INT80_32)); set_bit(IA32_SYSCALL_VECTOR, used_vectors); #endif @@ -1035,15 +1038,15 @@ void __init trap_init(void) * in early_trap_init(). However, ITS works only after * cpu_init() loads TSS. See comments in early_trap_init(). */ - set_intr_gate_ist(X86_TRAP_DB, &debug, DEBUG_STACK); + set_intr_gate_ist(X86_TRAP_DB, pv_trap_entry(debug), DEBUG_STACK); /* int3 can be called from all */ - set_system_intr_gate_ist(X86_TRAP_BP, &int3, DEBUG_STACK); + set_system_intr_gate_ist(X86_TRAP_BP, pv_trap_entry(int3), DEBUG_STACK); x86_init.irqs.trap_init(); #ifdef CONFIG_X86_64 memcpy(&debug_idt_table, &idt_table, IDT_ENTRIES * 16); - set_nmi_gate(X86_TRAP_DB, &debug); - set_nmi_gate(X86_TRAP_BP, &int3); + set_nmi_gate(X86_TRAP_DB, pv_trap_entry(debug)); + set_nmi_gate(X86_TRAP_BP, pv_trap_entry(int3)); #endif } diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 811e4ddb3f37..7e107142bc4f 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -598,24 +598,22 @@ static int cvt_gate_to_trap(int vector, const gate_desc *val, * so we should never see them. Warn if * there's an unexpected IST-using fault handler. */ - if (addr == (unsigned long)debug) - addr = (unsigned long)xen_debug; - else if (addr == (unsigned long)int3) - addr = (unsigned long)xen_int3; - else if (addr == (unsigned long)stack_segment) - addr = (unsigned long)xen_stack_segment; - else if (addr == (unsigned long)double_fault) { + if (addr == (unsigned long)xen_debug) + addr = (unsigned long)xen_xendebug; + else if (addr == (unsigned long)xen_int3) + addr = (unsigned long)xen_xenint3; + else if (addr == (unsigned long)xen_double_fault) { /* Don't need to handle these */ return 0; #ifdef CONFIG_X86_MCE - } else if (addr == (unsigned long)machine_check) { + } else if (addr == (unsigned long)xen_machine_check) { /* * when xen hypervisor inject vMCE to guest, * use native mce handler to handle it */ ; #endif - } else if (addr == (unsigned long)nmi) + } else if (addr == (unsigned long)xen_nmi) /* * Use the native version as well. */ diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c index 33e92955e09d..d4eff5676cfa 100644 --- a/arch/x86/xen/irq.c +++ b/arch/x86/xen/irq.c @@ -123,9 +123,6 @@ static const struct pv_irq_ops xen_irq_ops __initconst = { .safe_halt = xen_safe_halt, .halt = xen_halt, -#ifdef CONFIG_X86_64 - .adjust_exception_frame = xen_adjust_exception_frame, -#endif }; void __init xen_init_irq_ops(void) diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S index c3df43141e70..f72ff71cc897 100644 --- a/arch/x86/xen/xen-asm_64.S +++ b/arch/x86/xen/xen-asm_64.S @@ -22,11 +22,46 @@ #include "xen-asm.h" -ENTRY(xen_adjust_exception_frame) - mov 8+0(%rsp), %rcx - mov 8+8(%rsp), %r11 - ret $16 -ENDPROC(xen_adjust_exception_frame) +.macro xen_pv_trap name +ENTRY(xen_\name) + pop %rcx + pop %r11 + jmp \name +END(xen_\name) +.endm + +xen_pv_trap divide_error +xen_pv_trap debug +xen_pv_trap xendebug +xen_pv_trap int3 +xen_pv_trap xenint3 +xen_pv_trap nmi +xen_pv_trap overflow +xen_pv_trap bounds +xen_pv_trap invalid_op +xen_pv_trap device_not_available +xen_pv_trap double_fault +xen_pv_trap coprocessor_segment_overrun +xen_pv_trap invalid_TSS +xen_pv_trap segment_not_present +xen_pv_trap stack_segment +xen_pv_trap general_protection +xen_pv_trap page_fault +xen_pv_trap async_page_fault +xen_pv_trap spurious_interrupt_bug +xen_pv_trap coprocessor_error +xen_pv_trap alignment_check +#ifdef CONFIG_X86_MCE +xen_pv_trap machine_check +#endif /* CONFIG_X86_MCE */ +xen_pv_trap simd_coprocessor_error +#ifdef CONFIG_IA32_EMULATION +xen_pv_trap entry_INT80_compat +#endif +#ifdef CONFIG_TRACING +xen_pv_trap trace_page_fault +#endif +xen_pv_trap hypervisor_callback hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32 /* diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 0d5004477db6..18e8729d42bd 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -145,7 +145,6 @@ DECL_ASM(void, xen_restore_fl_direct, unsigned long); __visible void xen_iret(void); __visible void xen_sysret32(void); __visible void xen_sysret64(void); -__visible void xen_adjust_exception_frame(void); extern int xen_panic_handler_init(void); -- 2.12.3