Return-Path: Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1752840AbbGKGRm (ORCPT ); Sat, 11 Jul 2015 02:17:42 -0400 Received: from mail-yk0-f170.google.com ([209.85.160.170]:36489 "EHLO mail-yk0-f170.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1752391AbbGKGRk (ORCPT ); Sat, 11 Jul 2015 02:17:40 -0400 From: Brian Gerst To: x86@kernel.org, linux-kernel@vger.kernel.org Cc: Ingo Molnar , "H. Peter Anvin" , Denys Vlasenko , Andy Lutomirski , Linus Torvalds Subject: [PATCH 5/5] x86/vm86: Use the normal pt_regs area for vm86 Date: Sat, 11 Jul 2015 01:09:20 -0400 Message-Id: <1436591360-16210-6-git-send-email-brgerst@gmail.com> X-Mailer: git-send-email 2.4.3 In-Reply-To: <1436591360-16210-1-git-send-email-brgerst@gmail.com> References: <1436591360-16210-1-git-send-email-brgerst@gmail.com> Sender: linux-kernel-owner@vger.kernel.org List-ID: X-Mailing-List: linux-kernel@vger.kernel.org Content-Length: 13230 Lines: 427 Change to use the normal pt_regs area to enter and exit vm86 mode. This is done by increasing the padding at the top of the stack to make room for the extra vm86 segment slots in the IRET frame. It then saves the 32-bit regs in the off-stack vm86 data, and copies in the vm86 regs. Exiting back to 32-bit mode does the reverse. This allows removing the hacks to jump directly into the exit asm code due to having to change the stack pointer. Returning normally from the vm86 syscall and the exception handlers allows things like ptrace and auditing to work properly. Signed-off-by: Brian Gerst --- arch/x86/entry/entry_32.S | 24 +------ arch/x86/include/asm/thread_info.h | 11 ++-- arch/x86/include/asm/vm86.h | 6 +- arch/x86/kernel/signal.c | 3 + arch/x86/kernel/vm86_32.c | 129 ++++++++++++++++--------------------- 5 files changed, 69 insertions(+), 104 deletions(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 21dc60a..f940e24 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -525,34 +525,12 @@ work_resched: work_notifysig: # deal with pending signals and # notify-resume requests -#ifdef CONFIG_VM86 - testl $X86_EFLAGS_VM, PT_EFLAGS(%esp) - movl %esp, %eax - jnz work_notifysig_v86 # returning to kernel-space or - # vm86-space -1: -#else - movl %esp, %eax -#endif TRACE_IRQS_ON ENABLE_INTERRUPTS(CLBR_NONE) - movb PT_CS(%esp), %bl - andb $SEGMENT_RPL_MASK, %bl - cmpb $USER_RPL, %bl - jb resume_kernel + movl %esp, %eax xorl %edx, %edx call do_notify_resume jmp resume_userspace - -#ifdef CONFIG_VM86 - ALIGN -work_notifysig_v86: - pushl %ecx # save ti_flags for do_notify_resume - call save_v86_state # %eax contains pt_regs pointer - popl %ecx - movl %eax, %esp - jmp 1b -#endif END(work_pending) # perform syscall exit tracing diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 225ee54..fdad5c2 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -27,14 +27,17 @@ * Without this offset, that can result in a page fault. (We are * careful that, in this case, the value we read doesn't matter.) * - * In vm86 mode, the hardware frame is much longer still, but we neither - * access the extra members from NMI context, nor do we write such a - * frame at sp0 at all. + * In vm86 mode, the hardware frame is much longer still, so add 16 + * bytes to make room for the real-mode segments. * * x86_64 has a fixed-length stack frame. */ #ifdef CONFIG_X86_32 -# define TOP_OF_KERNEL_STACK_PADDING 8 +# ifdef CONFIG_VM86 +# define TOP_OF_KERNEL_STACK_PADDING 16 +# else +# define TOP_OF_KERNEL_STACK_PADDING 8 +# endif #else # define TOP_OF_KERNEL_STACK_PADDING 0 #endif diff --git a/arch/x86/include/asm/vm86.h b/arch/x86/include/asm/vm86.h index 84d4bda..a72a1a2 100644 --- a/arch/x86/include/asm/vm86.h +++ b/arch/x86/include/asm/vm86.h @@ -30,7 +30,7 @@ struct kernel_vm86_regs { struct kernel_vm86_info { struct vm86_struct __user *vm86_info; - struct pt_regs *regs32; + struct pt_regs regs32; unsigned long v86flags; unsigned long v86mask; unsigned long saved_sp0; @@ -50,7 +50,7 @@ struct kernel_vm86_info { void handle_vm86_fault(struct kernel_vm86_regs *, long); int handle_vm86_trap(struct kernel_vm86_regs *, long, int); -struct pt_regs *save_v86_state(struct kernel_vm86_regs *); +void save_v86_state(struct kernel_vm86_regs *, int); struct task_struct; void release_vm86_irqs(struct task_struct *); @@ -65,6 +65,8 @@ static inline int handle_vm86_trap(struct kernel_vm86_regs *a, long b, int c) return 0; } +static inline void save_v86_state(struct kernel_vm86_regs *, int) { } + #endif /* CONFIG_VM86 */ #endif /* _ASM_X86_VM86_H */ diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 7e88cc7..bfd736e 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -635,6 +635,9 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) bool stepping, failed; struct fpu *fpu = ¤t->thread.fpu; + if (v8086_mode(regs)) + save_v86_state((struct kernel_vm86_regs *) regs, VM86_SIGNAL); + /* Are we from a system call? */ if (syscall_get_nr(current, regs) >= 0) { /* If so, check system call restarting.. */ diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index d7aae93..bde9d2e 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -50,6 +50,7 @@ #include #include #include +#include /* * Known problems: @@ -120,10 +121,9 @@ static int copy_vm86_regs_from_user(struct kernel_vm86_regs *regs, return ret; } -struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) +void save_v86_state(struct kernel_vm86_regs *regs, int retval) { struct tss_struct *tss; - struct pt_regs *ret; unsigned long tmp; struct kernel_vm86_info *vm86 = current->thread.vm86; @@ -153,12 +153,12 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs) vm86->saved_sp0 = 0; put_cpu(); - ret = vm86->regs32; + memcpy(®s->pt, &vm86->regs32, sizeof(struct pt_regs)); - ret->fs = vm86->saved_fs; - set_user_gs(ret, vm86->saved_gs); + regs->pt.fs = vm86->saved_fs; + set_user_gs(®s->pt, vm86->saved_gs); - return ret; + regs->pt.ax = retval; } static void mark_screen_rdonly(struct mm_struct *mm) @@ -232,8 +232,8 @@ static long do_sys_vm86(struct vm86plus_struct __user *v86, bool plus) struct tss_struct *tss; struct task_struct *tsk = current; struct kernel_vm86_info *vm86 = tsk->thread.vm86; - struct kernel_vm86_regs regs; - struct pt_regs *regs32 = current_pt_regs(); + struct kernel_vm86_regs vm86regs; + struct pt_regs *regs = current_pt_regs(); if (!vm86) { @@ -243,7 +243,7 @@ static long do_sys_vm86(struct vm86plus_struct __user *v86, bool plus) } if (vm86->saved_sp0) return -EPERM; - if (copy_vm86_regs_from_user(®s, &v86->regs)) + if (copy_vm86_regs_from_user(&vm86regs, &v86->regs)) return -EFAULT; if (plus) { if (copy_from_user(&vm86->flags, &v86->flags, @@ -258,17 +258,17 @@ static long do_sys_vm86(struct vm86plus_struct __user *v86, bool plus) return -EFAULT; memset(&vm86->vm86plus, 0, sizeof(struct vm86plus_info_struct)); } - vm86->regs32 = regs32; + memcpy(&vm86->regs32, regs, sizeof(struct pt_regs)); vm86->vm86_info = (struct vm86_struct __user *) v86; /* * make sure the vm86() system call doesn't try to do anything silly */ - regs.pt.ds = 0; - regs.pt.es = 0; - regs.pt.fs = 0; + vm86regs.pt.ds = 0; + vm86regs.pt.es = 0; + vm86regs.pt.fs = 0; #ifndef CONFIG_X86_32_LAZY_GS - regs.pt.gs = 0; + vm86regs.pt.gs = 0; #endif /* @@ -276,10 +276,10 @@ static long do_sys_vm86(struct vm86plus_struct __user *v86, bool plus) * has set it up safely, so this makes sure interrupt etc flags are * inherited from protected mode. */ - VEFLAGS = regs.pt.flags; - regs.pt.flags &= SAFE_MASK; - regs.pt.flags |= regs32->flags & ~SAFE_MASK; - regs.pt.flags |= X86_VM_MASK; + VEFLAGS = vm86regs.pt.flags; + vm86regs.pt.flags &= SAFE_MASK; + vm86regs.pt.flags |= vm86->regs32.flags & ~SAFE_MASK; + vm86regs.pt.flags |= X86_VM_MASK; switch (vm86->cpu_type) { case CPU_286: @@ -297,16 +297,15 @@ static long do_sys_vm86(struct vm86plus_struct __user *v86, bool plus) } /* - * Save old state, set default return value (%ax) to 0 (VM86_SIGNAL) + * Save old state */ - regs32->ax = VM86_SIGNAL; vm86->saved_sp0 = tsk->thread.sp0; - vm86->saved_fs = regs32->fs; - vm86->saved_gs = get_user_gs(regs32); + vm86->saved_fs = vm86->regs32.fs; + vm86->saved_gs = get_user_gs(&vm86->regs32); tss = &per_cpu(cpu_tss, get_cpu()); - /* Set new sp0 right below 32-bit regs */ - tsk->thread.sp0 = (unsigned long) regs32; + /* make room for real-mode segments */ + tsk->thread.sp0 += 16; if (cpu_has_sep) tsk->thread.sysenter_cs = 0; load_sp0(tss, &tsk->thread); @@ -315,41 +314,14 @@ static long do_sys_vm86(struct vm86plus_struct __user *v86, bool plus) if (vm86->flags & VM86_SCREEN_BITMAP) mark_screen_rdonly(tsk->mm); - /*call __audit_syscall_exit since we do not exit via the normal paths */ -#ifdef CONFIG_AUDITSYSCALL - if (unlikely(current->audit_context)) - __audit_syscall_exit(1, 0); -#endif - - __asm__ __volatile__( - "movl %0,%%esp\n\t" - "movl %1,%%ebp\n\t" -#ifdef CONFIG_X86_32_LAZY_GS - "mov %2, %%gs\n\t" -#endif - "jmp resume_userspace" - : /* no outputs */ - :"r" (®s), "r" (task_thread_info(tsk)), "r" (0)); - return 0; /* we never return here */ -} - -static inline void return_to_32bit(struct kernel_vm86_regs *regs16, int retval) -{ - struct pt_regs *regs32; - - regs32 = save_v86_state(regs16); - regs32->ax = retval; - __asm__ __volatile__("movl %0,%%esp\n\t" - "movl %1,%%ebp\n\t" - "jmp resume_userspace" - : : "r" (regs32), "r" (current_thread_info())); + memcpy((struct kernel_vm86_regs *)regs, &vm86regs, sizeof(vm86regs)); + force_iret(); + return regs->ax; } static inline void set_IF(struct kernel_vm86_regs *regs) { VEFLAGS |= X86_EFLAGS_VIF; - if (VEFLAGS & X86_EFLAGS_VIP) - return_to_32bit(regs, VM86_STI); } static inline void clear_IF(struct kernel_vm86_regs *regs) @@ -529,7 +501,7 @@ static void do_int(struct kernel_vm86_regs *regs, int i, return; cannot_handle: - return_to_32bit(regs, VM86_INTx + (i << 8)); + save_v86_state(regs, VM86_INTx + (i << 8)); } int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) @@ -538,11 +510,7 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno) if (vm86->vm86plus.is_vm86pus) { if ((trapno == 3) || (trapno == 1)) { - vm86->regs32->ax = VM86_TRAP + (trapno << 8); - /* setting this flag forces the code in entry_32.S to - the path where we call save_v86_state() and change - the stack pointer to regs32 */ - set_thread_flag(TIF_NOTIFY_RESUME); + save_v86_state(regs, VM86_TRAP + (trapno << 8)); return 0; } do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs)); @@ -568,12 +536,6 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) #define CHECK_IF_IN_TRAP \ if (vmpi->vm86dbg_active && vmpi->vm86dbg_TFpendig) \ newflags |= X86_EFLAGS_TF -#define VM86_FAULT_RETURN do { \ - if (vmpi->force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) \ - return_to_32bit(regs, VM86_PICRETURN); \ - if (orig_flags & X86_EFLAGS_TF) \ - handle_vm86_trap(regs, 0, 1); \ - return; } while (0) orig_flags = *(unsigned short *)®s->pt.flags; @@ -612,7 +574,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) SP(regs) -= 2; } IP(regs) = ip; - VM86_FAULT_RETURN; + goto vm86_fault_return; /* popf */ case 0x9d: @@ -632,7 +594,7 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) else set_vflags_short(newflags, regs); - VM86_FAULT_RETURN; + goto check_vip; } /* int xx */ @@ -640,8 +602,10 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) int intno = popb(csp, ip, simulate_sigsegv); IP(regs) = ip; if (vmpi->vm86dbg_active) { - if ((1 << (intno & 7)) & vmpi->vm86dbg_intxxtab[intno >> 3]) - return_to_32bit(regs, VM86_INTx + (intno << 8)); + if ((1 << (intno & 7)) & vmpi->vm86dbg_intxxtab[intno >> 3]) { + save_v86_state(regs, VM86_INTx + (intno << 8)); + return; + } } do_int(regs, intno, ssp, sp); return; @@ -672,14 +636,14 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) } else { set_vflags_short(newflags, regs); } - VM86_FAULT_RETURN; + goto check_vip; } /* cli */ case 0xfa: IP(regs) = ip; clear_IF(regs); - VM86_FAULT_RETURN; + goto vm86_fault_return; /* sti */ /* @@ -691,14 +655,29 @@ void handle_vm86_fault(struct kernel_vm86_regs *regs, long error_code) case 0xfb: IP(regs) = ip; set_IF(regs); - VM86_FAULT_RETURN; + goto check_vip; default: - return_to_32bit(regs, VM86_UNKNOWN); + save_v86_state(regs, VM86_UNKNOWN); } return; +check_vip: + if (VEFLAGS & X86_EFLAGS_VIP) { + save_v86_state(regs, VM86_STI); + return; + } + +vm86_fault_return: + if (vmpi->force_return_for_pic && (VEFLAGS & (X86_EFLAGS_IF | X86_EFLAGS_VIF))) { + save_v86_state(regs, VM86_PICRETURN); + return; + } + if (orig_flags & X86_EFLAGS_TF) + handle_vm86_trap(regs, 0, X86_TRAP_DB); + return; + simulate_sigsegv: /* FIXME: After a long discussion with Stas we finally * agreed, that this is wrong. Here we should @@ -710,7 +689,7 @@ simulate_sigsegv: * should be a mixture of the two, but how do we * get the information? [KD] */ - return_to_32bit(regs, VM86_UNKNOWN); + save_v86_state(regs, VM86_UNKNOWN); } /* ---------------- vm86 special IRQ passing stuff ----------------- */ -- 2.4.3 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/