2015-07-10 02:17:50

by Andy Lutomirski

[permalink] [raw]
Subject: [RFC/PATCH v2 v2 0/6] x86_32: Migrate to new entry/exit paths

This is a review version of the 32-bit asm-to-C migration. I think
it works, but it's not yet well enough tested. I'm a lot more
familiar with the 64-bit asm than the 32-bit asm.

The vm86 stuff especially needs much more careful testing. Brian,
since you're playing with vm86 now, can you take a look?

Changes from v1:
- Fix some nasty vm86 issues. v1 was a regression. v2 is an improvement
over the status quo AFAICT.
- Add patch 1, which is probably worthwhile on its own.
- Get rid of the temporary ud2 hack.

Andy Lutomirski (6):
x86/selftests, x86/vm86: Improve entry_from_vm86 selftest
x86/entry/32: Remove 32-bit syscall audit optimizations
x86/entry/32: Fix an incorrect comment for work_notifysig_v86
x86/entry/32: Remove unnecessary asm check for returns to kernel mode
x86/entry/32: Migrate to C exit path and rework vm86 exit hack
x86/entry: Remove do_notify_resume, syscall_trace_leave, and their TIF
masks

arch/x86/entry/common.c | 107 ++++++++++-----------
arch/x86/entry/entry_32.S | 130 +++----------------------
arch/x86/include/asm/ptrace.h | 1 -
arch/x86/include/asm/signal.h | 1 -
arch/x86/include/asm/thread_info.h | 18 +---
arch/x86/kernel/vm86_32.c | 6 +-
tools/testing/selftests/x86/entry_from_vm86.c | 132 ++++++++++++++++++++++++--
7 files changed, 191 insertions(+), 204 deletions(-)

--
2.4.3


2015-07-10 02:18:23

by Andy Lutomirski

[permalink] [raw]
Subject: [RFC/PATCH v2 v2 1/6] x86/selftests, x86/vm86: Improve entry_from_vm86 selftest

The entry_from_vm86 selftest was very weak. Improve it: test more
types of kernel entries from vm86 mode and test them more carefully.

While we're at it, try to improve behavior on non-SEP CPUs. The
old code was buggy because I misunderstood the intended semantics
of #UD in vm86, so I didn't handle a possible signal.

Signed-off-by: Andy Lutomirski <[email protected]>
---
tools/testing/selftests/x86/entry_from_vm86.c | 132 ++++++++++++++++++++++++--
1 file changed, 124 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
index 5c38a187677b..f004b2a09916 100644
--- a/tools/testing/selftests/x86/entry_from_vm86.c
+++ b/tools/testing/selftests/x86/entry_from_vm86.c
@@ -28,6 +28,55 @@
static unsigned long load_addr = 0x10000;
static int nerrs = 0;

+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static sig_atomic_t got_signal;
+
+static void sighandler(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM ||
+ (ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) {
+ printf("[FAIL]\tSignal frame should not reflect vm86 mode\n");
+ nerrs++;
+ }
+
+ const char *signame;
+ if (sig == SIGSEGV)
+ signame = "SIGSEGV";
+ else if (sig == SIGILL)
+ signame = "SIGILL";
+ else
+ signame = "unexpected signal";
+
+ printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame,
+ (unsigned long)ctx->uc_mcontext.gregs[REG_EFL],
+ (unsigned short)ctx->uc_mcontext.gregs[REG_CS]);
+
+ got_signal = 1;
+}
+
asm (
".pushsection .rodata\n\t"
".type vmcode_bound, @object\n\t"
@@ -38,6 +87,14 @@ asm (
"int3\n\t"
"vmcode_sysenter:\n\t"
"sysenter\n\t"
+ "vmcode_syscall:\n\t"
+ "syscall\n\t"
+ "vmcode_sti:\n\t"
+ "sti\n\t"
+ "vmcode_int3:\n\t"
+ "int3\n\t"
+ "vmcode_int80:\n\t"
+ "int $0x80\n\t"
".size vmcode, . - vmcode\n\t"
"end_vmcode:\n\t"
".code32\n\t"
@@ -45,9 +102,11 @@ asm (
);

extern unsigned char vmcode[], end_vmcode[];
-extern unsigned char vmcode_bound[], vmcode_sysenter[];
+extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[],
+ vmcode_sti[], vmcode_int3[], vmcode_int80[];

static void do_test(struct vm86plus_struct *v86, unsigned long eip,
+ unsigned int rettype, unsigned int retarg,
const char *text)
{
long ret;
@@ -73,13 +132,28 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip,
else
sprintf(trapname, "%d", trapno);

- printf("[OK]\tExited vm86 mode due to #%s\n", trapname);
+ printf("[INFO]\tExited vm86 mode due to #%s\n", trapname);
} else if (VM86_TYPE(ret) == VM86_UNKNOWN) {
- printf("[OK]\tExited vm86 mode due to unhandled GP fault\n");
+ printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n");
+ } else if (VM86_TYPE(ret) == VM86_TRAP) {
+ printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n",
+ VM86_ARG(ret));
+ } else if (VM86_TYPE(ret) == VM86_SIGNAL) {
+ printf("[INFO]\tExited vm86 mode due to a signal\n");
+ } else if (VM86_TYPE(ret) == VM86_STI) {
+ printf("[INFO]\tExited vm86 mode due to STI\n");
} else {
- printf("[OK]\tExited vm86 mode due to type %ld, arg %ld\n",
+ printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n",
VM86_TYPE(ret), VM86_ARG(ret));
}
+
+ if (rettype == -1 ||
+ (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) {
+ printf("[OK]\tReturned correctly\n");
+ } else {
+ printf("[FAIL]\tIncorrect return reason\n");
+ nerrs++;
+ }
}

int main(void)
@@ -105,10 +179,52 @@ int main(void)
assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */

/* #BR -- should deliver SIG??? */
- do_test(&v86, vmcode_bound - vmcode, "#BR");
-
- /* SYSENTER -- should cause #GP or #UD depending on CPU */
- do_test(&v86, vmcode_sysenter - vmcode, "SYSENTER");
+ do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR");
+
+ /*
+ * SYSENTER -- should cause #GP or #UD depending on CPU.
+ * Expected return type -1 means that we shouldn't validate
+ * the vm86 return value. This will avoid problems on non-SEP
+ * CPUs.
+ */
+ sethandler(SIGILL, sighandler, 0);
+ do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER");
+ clearhandler(SIGILL);
+
+ /*
+ * SYSCALL would be a disaster in VM86 mode. Fortunately,
+ * there is no kernel that both enables SYSCALL and sets
+ * EFER.SCE, so it's #UD on all systems. But vm86 is
+ * buggy (or has a "feature"), so the SIGILL will actually
+ * be delivered.
+ */
+ sethandler(SIGILL, sighandler, 0);
+ do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL");
+ clearhandler(SIGILL);
+
+ /* STI with VIP set */
+ v86.regs.eflags |= X86_EFLAGS_VIP;
+ v86.regs.eflags &= ~X86_EFLAGS_IF;
+ do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set");
+
+ /* INT3 -- should cause #BP */
+ do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3");
+
+ /* INT80 -- should exit with "INTx 0x80" */
+ v86.regs.eax = (unsigned int)-1;
+ do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80");
+
+ /* Execute a null pointer */
+ v86.regs.cs = 0;
+ v86.regs.ss = 0;
+ sethandler(SIGSEGV, sighandler, 0);
+ got_signal = 0;
+ do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer");
+ if (!got_signal) {
+ printf("[FAIL]\tDid not receive SIGSEGV\n");
+ nerrs++;
+ }
+ clearhandler(SIGSEGV);

return (nerrs == 0 ? 0 : 1);
}
--
2.4.3

2015-07-10 02:18:09

by Andy Lutomirski

[permalink] [raw]
Subject: [RFC/PATCH v2 v2 2/6] x86/entry/32: Remove 32-bit syscall audit optimizations

The asm audit optimizations are ugly and obfuscate the code too
much. Remove them.

This will regress performance if syscall auditing is enabled on
32-bit kernels and sysenter is in use. If this becomes a problem,
interested parties are encouraged to implement the equivalent of the
64-bit opportunistic sysret optimization.

Alternatively, a case could be made that, on 32-bit kernels, a less
messy asm audit optimization could be done. 32-bit kernels don't have
the complicated partial register saving tricks that 64-bit kernels
have, so the sysenter post-syscall path could just call the audit
hooks directly. Any reimplementation of this ought to demonstrate
that it only calls the audit hook once per syscall, though, which does
not currently appear to be true. Someone would have to make the case
that doing so would be better than implementing opportunistic sysexit,
though.

Signed-off-by: Andy Lutomirski <[email protected]>
---
arch/x86/entry/entry_32.S | 48 ++---------------------------------------------
1 file changed, 2 insertions(+), 46 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 21dc60a60b5f..90f9e7f6c15e 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -45,16 +45,6 @@
#include <asm/asm.h>
#include <asm/smap.h>

-/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
-#include <linux/elf-em.h>
-#define AUDIT_ARCH_I386 (EM_386|__AUDIT_ARCH_LE)
-#define __AUDIT_ARCH_LE 0x40000000
-
-#ifndef CONFIG_AUDITSYSCALL
-# define sysenter_audit syscall_trace_entry
-# define sysexit_audit syscall_exit_work
-#endif
-
.section .entry.text, "ax"

/*
@@ -339,7 +329,7 @@ sysenter_past_esp:
GET_THREAD_INFO(%ebp)

testl $_TIF_WORK_SYSCALL_ENTRY, TI_flags(%ebp)
- jnz sysenter_audit
+ jnz syscall_trace_entry
sysenter_do_call:
cmpl $(NR_syscalls), %eax
jae sysenter_badsys
@@ -351,7 +341,7 @@ sysenter_after_call:
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testl $_TIF_ALLWORK_MASK, %ecx
- jnz sysexit_audit
+ jnz syscall_exit_work
sysenter_exit:
/* if something modifies registers it must also disable sysexit */
movl PT_EIP(%esp), %edx
@@ -362,40 +352,6 @@ sysenter_exit:
PTGS_TO_GS
ENABLE_INTERRUPTS_SYSEXIT

-#ifdef CONFIG_AUDITSYSCALL
-sysenter_audit:
- testl $(_TIF_WORK_SYSCALL_ENTRY & ~_TIF_SYSCALL_AUDIT), TI_flags(%ebp)
- jnz syscall_trace_entry
- /* movl PT_EAX(%esp), %eax already set, syscall number: 1st arg to audit */
- movl PT_EBX(%esp), %edx /* ebx/a0: 2nd arg to audit */
- /* movl PT_ECX(%esp), %ecx already set, a1: 3nd arg to audit */
- pushl PT_ESI(%esp) /* a3: 5th arg */
- pushl PT_EDX+4(%esp) /* a2: 4th arg */
- call __audit_syscall_entry
- popl %ecx /* get that remapped edx off the stack */
- popl %ecx /* get that remapped esi off the stack */
- movl PT_EAX(%esp), %eax /* reload syscall number */
- jmp sysenter_do_call
-
-sysexit_audit:
- testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
- jnz syscall_exit_work
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_ANY)
- movl %eax, %edx /* second arg, syscall return value */
- cmpl $-MAX_ERRNO, %eax /* is it an error ? */
- setbe %al /* 1 if so, 0 if not */
- movzbl %al, %eax /* zero-extend that */
- call __audit_syscall_exit
- DISABLE_INTERRUPTS(CLBR_ANY)
- TRACE_IRQS_OFF
- movl TI_flags(%ebp), %ecx
- testl $(_TIF_ALLWORK_MASK & ~_TIF_SYSCALL_AUDIT), %ecx
- jnz syscall_exit_work
- movl PT_EAX(%esp), %eax /* reload syscall return value */
- jmp sysenter_exit
-#endif
-
.pushsection .fixup, "ax"
2: movl $0, PT_FS(%esp)
jmp 1b
--
2.4.3

2015-07-10 02:18:03

by Andy Lutomirski

[permalink] [raw]
Subject: [RFC/PATCH v2 v2 3/6] x86/entry/32: Fix an incorrect comment for work_notifysig_v86

This code path is for returns to user mode only.

Signed-off-by: Andy Lutomirski <[email protected]>
---
arch/x86/entry/entry_32.S | 3 +--
1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 90f9e7f6c15e..a36d5df6a749 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -484,8 +484,7 @@ work_notifysig: # deal with pending signals and
#ifdef CONFIG_VM86
testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
movl %esp, %eax
- jnz work_notifysig_v86 # returning to kernel-space or
- # vm86-space
+ jnz work_notifysig_v86 # special case for v86
1:
#else
movl %esp, %eax
--
2.4.3

2015-07-10 02:18:31

by Andy Lutomirski

[permalink] [raw]
Subject: [RFC/PATCH v2 v2 4/6] x86/entry/32: Remove unnecessary asm check for returns to kernel mode

Unless I missed something, 44fbbb3dc687c ("x86: get rid of calling
do_notify_resume() when returning to kernel mode") was unnecessarily
paranoid. It split the exit path into resume_userspace and
resume_kernel, after which it was no longer possible to get to
work_notifysig when returning to kernel mode. The check for kernel
mode in work_notifysig is superfluous. Remove it.

Cc: Al Viro <[email protected]>
Signed-off-by: Andy Lutomirski <[email protected]>
---
arch/x86/entry/entry_32.S | 4 ----
1 file changed, 4 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index a36d5df6a749..66ff9c4055d7 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -491,10 +491,6 @@ work_notifysig: # deal with pending signals and
#endif
TRACE_IRQS_ON
ENABLE_INTERRUPTS(CLBR_NONE)
- movb PT_CS(%esp), %bl
- andb $SEGMENT_RPL_MASK, %bl
- cmpb $USER_RPL, %bl
- jb resume_kernel
xorl %edx, %edx
call do_notify_resume
jmp resume_userspace
--
2.4.3

2015-07-10 02:18:39

by Andy Lutomirski

[permalink] [raw]
Subject: [RFC/PATCH v2 v2 5/6] x86/entry/32: Migrate to C exit path and rework vm86 exit hack

This removes the hybrid asm-and-C implementation of exit work.

This patch modifies a giant hack. vm86 used to fiddle with
TIF_NOTIFY_RESUME and fix itself up in the exit asm. The hack was
messy and completely incorrect: it broke vm86 if the syscall slow
path was being used.

Rework the hack. We now forcibly exit vm86 mode on return to
userspace if we're delivering a signal (this is needed to deliver
the signal correctly) or if a new TIF_EXIT_VM86 flag is set. The
TIF_NOTIFY_RESUME hack is changed to use TIF_EXIT_VM86 instead.

This makes prepare_exit_to_usermode a bit slower on CONFIG_VM86=y
kernels. People shouldn't use such kernels if they care about
sanity, security, or performance.

Brian Gerst is planning to further rework vm86 mode to leave pt_regs
where it belongs. That will allow us to revert the
pt_regs_to_thread_info slowdown the stack switching parts of this
code; instead we can just exit normally, as vm86 won't have a
special stack layout any more.

Before this change, the entry_from_vm86 test failed under strace.
Now it passes.

Signed-off-by: Andy Lutomirski <[email protected]>
---
arch/x86/entry/common.c | 56 ++++++++++++++++++++++++++-
arch/x86/entry/entry_32.S | 79 ++++++--------------------------------
arch/x86/include/asm/thread_info.h | 2 +
arch/x86/kernel/vm86_32.c | 6 +--
4 files changed, 69 insertions(+), 74 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index febc53086a69..aeaf7d64be0f 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -240,10 +240,51 @@ void syscall_trace_leave(struct pt_regs *regs)

static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
{
+#ifdef CONFIG_VM86
+ /*
+ * In VM86 mode, pt_regs isn't in a well-defined place on the
+ * stack. Skip the optimization entirely.
+ */
+ return current_thread_info();
+#else
unsigned long top_of_stack =
(unsigned long)(regs + 1) + TOP_OF_KERNEL_STACK_PADDING;
return (struct thread_info *)(top_of_stack - THREAD_SIZE);
+#endif
+}
+
+#ifdef CONFIG_VM86
+static void __noreturn exit_vm86_immediately(struct pt_regs *regs)
+{
+ /*
+ * VM86 sometimes needs to exit back to normal user mode
+ * (unsurprisingly) and its hack of resetting the stack and
+ * jumping into the exit asm isn't always usable (also
+ * unsurprisingly). Instead, we land in this abomination.
+ *
+ * While I can't defend this code as being anything other
+ * than awful, at least it's more or less self-contained,
+ * and it's less awful and much less buggy than the even
+ * worse hack it replaces. --Andy
+ */
+ struct pt_regs *regs32;
+
+ clear_tsk_thread_flag(current, TIF_EXIT_VM86);
+ regs32 = save_v86_state((struct kernel_vm86_regs *)regs);
+ local_irq_disable();
+ __asm__ __volatile__(
+ "movl %0,%%esp\n\t"
+ "movl %1,%%ebp\n\t"
+ "jmp resume_userspace"
+ : : "r" (regs32), "r" (current_thread_info()));
+
+ /*
+ * We don't get here. Instead we restart
+ * prepare_exit_to_usermode via resume_userspace.
+ */
+ unreachable();
}
+#endif

/* Called with IRQs disabled. */
__visible void prepare_exit_to_usermode(struct pt_regs *regs)
@@ -264,12 +305,18 @@ __visible void prepare_exit_to_usermode(struct pt_regs *regs)
READ_ONCE(pt_regs_to_thread_info(regs)->flags);

if (!(cached_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME |
- _TIF_UPROBE | _TIF_NEED_RESCHED)))
+ _TIF_UPROBE | _TIF_NEED_RESCHED |
+ _TIF_EXIT_VM86)))
break;

/* We have work to do. */
local_irq_enable();

+#ifdef CONFIG_VM86
+ if (cached_flags & _TIF_EXIT_VM86)
+ exit_vm86_immediately(regs);
+#endif
+
if (cached_flags & _TIF_NEED_RESCHED)
schedule();

@@ -277,8 +324,13 @@ __visible void prepare_exit_to_usermode(struct pt_regs *regs)
uprobe_notify_resume(regs);

/* deal with pending signal delivery */
- if (cached_flags & _TIF_SIGPENDING)
+ if (cached_flags & _TIF_SIGPENDING) {
+#ifdef CONFIG_VM86
+ if (v8086_mode(regs))
+ exit_vm86_immediately(regs);
+#endif
do_signal(regs);
+ }

if (cached_flags & _TIF_NOTIFY_RESUME) {
clear_thread_flag(TIF_NOTIFY_RESUME);
diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 66ff9c4055d7..b2909bf8cf70 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -256,14 +256,10 @@ ret_from_intr:

ENTRY(resume_userspace)
LOCKDEP_SYS_EXIT
- DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
- # setting need_resched or sigpending
- # between sampling and the iret
+ DISABLE_INTERRUPTS(CLBR_ANY)
TRACE_IRQS_OFF
- movl TI_flags(%ebp), %ecx
- andl $_TIF_WORK_MASK, %ecx # is there any work to be done on
- # int/exception return?
- jne work_pending
+ movl %esp, %eax
+ call prepare_exit_to_usermode
jmp restore_all
END(ret_from_exception)

@@ -341,7 +337,7 @@ sysenter_after_call:
TRACE_IRQS_OFF
movl TI_flags(%ebp), %ecx
testl $_TIF_ALLWORK_MASK, %ecx
- jnz syscall_exit_work
+ jnz syscall_exit_work_irqs_off
sysenter_exit:
/* if something modifies registers it must also disable sysexit */
movl PT_EIP(%esp), %edx
@@ -377,13 +373,7 @@ syscall_after_call:
movl %eax, PT_EAX(%esp) # store the return value
syscall_exit:
LOCKDEP_SYS_EXIT
- DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
- # setting need_resched or sigpending
- # between sampling and the iret
- TRACE_IRQS_OFF
- movl TI_flags(%ebp), %ecx
- testl $_TIF_ALLWORK_MASK, %ecx # current->work
- jnz syscall_exit_work
+ jmp syscall_exit_work

restore_all:
TRACE_IRQS_IRET
@@ -460,52 +450,6 @@ ldt_ss:
#endif
ENDPROC(entry_INT80_32)

- # perform work that needs to be done immediately before resumption
- ALIGN
-work_pending:
- testb $_TIF_NEED_RESCHED, %cl
- jz work_notifysig
-work_resched:
- call schedule
- LOCKDEP_SYS_EXIT
- DISABLE_INTERRUPTS(CLBR_ANY) # make sure we don't miss an interrupt
- # setting need_resched or sigpending
- # between sampling and the iret
- TRACE_IRQS_OFF
- movl TI_flags(%ebp), %ecx
- andl $_TIF_WORK_MASK, %ecx # is there any work to be done other
- # than syscall tracing?
- jz restore_all
- testb $_TIF_NEED_RESCHED, %cl
- jnz work_resched
-
-work_notifysig: # deal with pending signals and
- # notify-resume requests
-#ifdef CONFIG_VM86
- testl $X86_EFLAGS_VM, PT_EFLAGS(%esp)
- movl %esp, %eax
- jnz work_notifysig_v86 # special case for v86
-1:
-#else
- movl %esp, %eax
-#endif
- TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_NONE)
- xorl %edx, %edx
- call do_notify_resume
- jmp resume_userspace
-
-#ifdef CONFIG_VM86
- ALIGN
-work_notifysig_v86:
- pushl %ecx # save ti_flags for do_notify_resume
- call save_v86_state # %eax contains pt_regs pointer
- popl %ecx
- movl %eax, %esp
- jmp 1b
-#endif
-END(work_pending)
-
# perform syscall exit tracing
ALIGN
syscall_trace_entry:
@@ -520,15 +464,14 @@ END(syscall_trace_entry)

# perform syscall exit tracing
ALIGN
-syscall_exit_work:
- testl $_TIF_WORK_SYSCALL_EXIT, %ecx
- jz work_pending
+syscall_exit_work_irqs_off:
TRACE_IRQS_ON
- ENABLE_INTERRUPTS(CLBR_ANY) # could let syscall_trace_leave() call
- # schedule() instead
+ ENABLE_INTERRUPTS(CLBR_ANY)
+
+syscall_exit_work:
movl %esp, %eax
- call syscall_trace_leave
- jmp resume_userspace
+ call syscall_return_slowpath
+ jmp restore_all
END(syscall_exit_work)

syscall_fault:
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 225ee545e1a0..5a60392ce70e 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -95,6 +95,7 @@ struct thread_info {
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
+#define TIF_EXIT_VM86 9 /* deferred vm86 exit */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
@@ -119,6 +120,7 @@ struct thread_info {
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
+#define _TIF_EXIT_VM86 (1 << TIF_EXIT_VM86)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
#define _TIF_NOTSC (1 << TIF_NOTSC)
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index fc9db6ef2a95..46dcef7046b6 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -549,11 +549,9 @@ int handle_vm86_trap(struct kernel_vm86_regs *regs, long error_code, int trapno)
{
if (VMPI.is_vm86pus) {
if ((trapno == 3) || (trapno == 1)) {
+ /* Queue up a return to normal userspace. */
KVM86->regs32->ax = VM86_TRAP + (trapno << 8);
- /* setting this flag forces the code in entry_32.S to
- the path where we call save_v86_state() and change
- the stack pointer to KVM86->regs32 */
- set_thread_flag(TIF_NOTIFY_RESUME);
+ set_thread_flag(TIF_EXIT_VM86);
return 0;
}
do_int(regs, trapno, (unsigned char __user *) (regs->pt.ss << 4), SP(regs));
--
2.4.3

2015-07-10 02:19:28

by Andy Lutomirski

[permalink] [raw]
Subject: [RFC/PATCH v2 v2 6/6] x86/entry: Remove do_notify_resume, syscall_trace_leave, and their TIF masks

They are no longer used. Good riddance!

Deleting the TIF_ macros is really nice. It was never clear why
there were so many variants.

Signed-off-by: Andy Lutomirski <[email protected]>
---
arch/x86/entry/common.c | 57 --------------------------------------
arch/x86/include/asm/ptrace.h | 1 -
arch/x86/include/asm/signal.h | 1 -
arch/x86/include/asm/thread_info.h | 16 -----------
4 files changed, 75 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index aeaf7d64be0f..994ae1ed6e9d 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -207,37 +207,6 @@ long syscall_trace_enter(struct pt_regs *regs)
return syscall_trace_enter_phase2(regs, arch, phase1_result);
}

-/* Deprecated. */
-void syscall_trace_leave(struct pt_regs *regs)
-{
- bool step;
-
- /*
- * We may come here right after calling schedule_user()
- * or do_notify_resume(), in which case we can be in RCU
- * user mode.
- */
- user_exit();
-
- audit_syscall_exit(regs);
-
- if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
- trace_sys_exit(regs, regs->ax);
-
- /*
- * If TIF_SYSCALL_EMU is set, we only get here because of
- * TIF_SINGLESTEP (i.e. this is PTRACE_SYSEMU_SINGLESTEP).
- * We already reported this syscall instruction in
- * syscall_trace_enter().
- */
- step = unlikely(test_thread_flag(TIF_SINGLESTEP)) &&
- !test_thread_flag(TIF_SYSCALL_EMU);
- if (step || test_thread_flag(TIF_SYSCALL_TRACE))
- tracehook_report_syscall_exit(regs, step);
-
- user_enter();
-}
-
static struct thread_info *pt_regs_to_thread_info(struct pt_regs *regs)
{
#ifdef CONFIG_VM86
@@ -398,29 +367,3 @@ __visible void syscall_return_slowpath(struct pt_regs *regs)
local_irq_disable();
prepare_exit_to_usermode(regs);
}
-
-/*
- * Deprecated notification of userspace execution resumption
- * - triggered by the TIF_WORK_MASK flags
- */
-__visible void
-do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
-{
- user_exit();
-
- if (thread_info_flags & _TIF_UPROBE)
- uprobe_notify_resume(regs);
-
- /* deal with pending signal delivery */
- if (thread_info_flags & _TIF_SIGPENDING)
- do_signal(regs);
-
- if (thread_info_flags & _TIF_NOTIFY_RESUME) {
- clear_thread_flag(TIF_NOTIFY_RESUME);
- tracehook_notify_resume(regs);
- }
- if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
- fire_user_return_notifiers();
-
- user_enter();
-}
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 5fabf1362942..6271281f947d 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -88,7 +88,6 @@ extern long syscall_trace_enter_phase2(struct pt_regs *, u32 arch,
unsigned long phase1_result);

extern long syscall_trace_enter(struct pt_regs *);
-extern void syscall_trace_leave(struct pt_regs *);

static inline unsigned long regs_return_value(struct pt_regs *regs)
{
diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h
index b42408bcf6b5..c481be78fcf1 100644
--- a/arch/x86/include/asm/signal.h
+++ b/arch/x86/include/asm/signal.h
@@ -31,7 +31,6 @@ typedef sigset_t compat_sigset_t;
#include <uapi/asm/signal.h>
#ifndef __ASSEMBLY__
extern void do_signal(struct pt_regs *regs);
-extern void do_notify_resume(struct pt_regs *, void *, __u32);

#define __ARCH_HAS_SA_RESTORER

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 5a60392ce70e..7bd82e365df6 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -142,27 +142,11 @@ struct thread_info {
_TIF_SECCOMP | _TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT | \
_TIF_NOHZ)

-/* work to do in syscall_trace_leave() */
-#define _TIF_WORK_SYSCALL_EXIT \
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SINGLESTEP | \
- _TIF_SYSCALL_TRACEPOINT | _TIF_NOHZ)
-
-/* work to do on interrupt/exception return */
-#define _TIF_WORK_MASK \
- (0x0000FFFF & \
- ~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT| \
- _TIF_SINGLESTEP|_TIF_SECCOMP|_TIF_SYSCALL_EMU))
-
/* work to do on any return to user space */
#define _TIF_ALLWORK_MASK \
((0x0000FFFF & ~_TIF_SECCOMP) | _TIF_SYSCALL_TRACEPOINT | \
_TIF_NOHZ)

-/* Only used for 64 bit */
-#define _TIF_DO_NOTIFY_MASK \
- (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | \
- _TIF_USER_RETURN_NOTIFY | _TIF_UPROBE)
-
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
(_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
--
2.4.3

2015-07-10 14:45:59

by Brian Gerst

[permalink] [raw]
Subject: Re: [RFC/PATCH v2 v2 0/6] x86_32: Migrate to new entry/exit paths

On Thu, Jul 9, 2015 at 10:17 PM, Andy Lutomirski <[email protected]> wrote:
> This is a review version of the 32-bit asm-to-C migration. I think
> it works, but it's not yet well enough tested. I'm a lot more
> familiar with the 64-bit asm than the 32-bit asm.
>
> The vm86 stuff especially needs much more careful testing. Brian,
> since you're playing with vm86 now, can you take a look?

The vm86 parts of these patches won't be needed after my patches. I
should have them ready to post this weekend. I just need to do some
more testing.

--
Brian Gerst

2015-07-13 19:33:42

by Andy Lutomirski

[permalink] [raw]
Subject: Re: [RFC/PATCH v2 v2 1/6] x86/selftests, x86/vm86: Improve entry_from_vm86 selftest

Ingo, would it make sense for you to apply just this patch? It should
be helpful for testing whatever we end up doing with the vm86 code.

Thanks,
Andy

On Thu, Jul 9, 2015 at 7:17 PM, Andy Lutomirski <[email protected]> wrote:
> The entry_from_vm86 selftest was very weak. Improve it: test more
> types of kernel entries from vm86 mode and test them more carefully.
>
> While we're at it, try to improve behavior on non-SEP CPUs. The
> old code was buggy because I misunderstood the intended semantics
> of #UD in vm86, so I didn't handle a possible signal.
>
> Signed-off-by: Andy Lutomirski <[email protected]>
> ---
> tools/testing/selftests/x86/entry_from_vm86.c | 132 ++++++++++++++++++++++++--
> 1 file changed, 124 insertions(+), 8 deletions(-)
>
> diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
> index 5c38a187677b..f004b2a09916 100644
> --- a/tools/testing/selftests/x86/entry_from_vm86.c
> +++ b/tools/testing/selftests/x86/entry_from_vm86.c
> @@ -28,6 +28,55 @@
> static unsigned long load_addr = 0x10000;
> static int nerrs = 0;
>
> +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
> + int flags)
> +{
> + struct sigaction sa;
> + memset(&sa, 0, sizeof(sa));
> + sa.sa_sigaction = handler;
> + sa.sa_flags = SA_SIGINFO | flags;
> + sigemptyset(&sa.sa_mask);
> + if (sigaction(sig, &sa, 0))
> + err(1, "sigaction");
> +}
> +
> +static void clearhandler(int sig)
> +{
> + struct sigaction sa;
> + memset(&sa, 0, sizeof(sa));
> + sa.sa_handler = SIG_DFL;
> + sigemptyset(&sa.sa_mask);
> + if (sigaction(sig, &sa, 0))
> + err(1, "sigaction");
> +}
> +
> +static sig_atomic_t got_signal;
> +
> +static void sighandler(int sig, siginfo_t *info, void *ctx_void)
> +{
> + ucontext_t *ctx = (ucontext_t*)ctx_void;
> +
> + if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM ||
> + (ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) {
> + printf("[FAIL]\tSignal frame should not reflect vm86 mode\n");
> + nerrs++;
> + }
> +
> + const char *signame;
> + if (sig == SIGSEGV)
> + signame = "SIGSEGV";
> + else if (sig == SIGILL)
> + signame = "SIGILL";
> + else
> + signame = "unexpected signal";
> +
> + printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame,
> + (unsigned long)ctx->uc_mcontext.gregs[REG_EFL],
> + (unsigned short)ctx->uc_mcontext.gregs[REG_CS]);
> +
> + got_signal = 1;
> +}
> +
> asm (
> ".pushsection .rodata\n\t"
> ".type vmcode_bound, @object\n\t"
> @@ -38,6 +87,14 @@ asm (
> "int3\n\t"
> "vmcode_sysenter:\n\t"
> "sysenter\n\t"
> + "vmcode_syscall:\n\t"
> + "syscall\n\t"
> + "vmcode_sti:\n\t"
> + "sti\n\t"
> + "vmcode_int3:\n\t"
> + "int3\n\t"
> + "vmcode_int80:\n\t"
> + "int $0x80\n\t"
> ".size vmcode, . - vmcode\n\t"
> "end_vmcode:\n\t"
> ".code32\n\t"
> @@ -45,9 +102,11 @@ asm (
> );
>
> extern unsigned char vmcode[], end_vmcode[];
> -extern unsigned char vmcode_bound[], vmcode_sysenter[];
> +extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[],
> + vmcode_sti[], vmcode_int3[], vmcode_int80[];
>
> static void do_test(struct vm86plus_struct *v86, unsigned long eip,
> + unsigned int rettype, unsigned int retarg,
> const char *text)
> {
> long ret;
> @@ -73,13 +132,28 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip,
> else
> sprintf(trapname, "%d", trapno);
>
> - printf("[OK]\tExited vm86 mode due to #%s\n", trapname);
> + printf("[INFO]\tExited vm86 mode due to #%s\n", trapname);
> } else if (VM86_TYPE(ret) == VM86_UNKNOWN) {
> - printf("[OK]\tExited vm86 mode due to unhandled GP fault\n");
> + printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n");
> + } else if (VM86_TYPE(ret) == VM86_TRAP) {
> + printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n",
> + VM86_ARG(ret));
> + } else if (VM86_TYPE(ret) == VM86_SIGNAL) {
> + printf("[INFO]\tExited vm86 mode due to a signal\n");
> + } else if (VM86_TYPE(ret) == VM86_STI) {
> + printf("[INFO]\tExited vm86 mode due to STI\n");
> } else {
> - printf("[OK]\tExited vm86 mode due to type %ld, arg %ld\n",
> + printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n",
> VM86_TYPE(ret), VM86_ARG(ret));
> }
> +
> + if (rettype == -1 ||
> + (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) {
> + printf("[OK]\tReturned correctly\n");
> + } else {
> + printf("[FAIL]\tIncorrect return reason\n");
> + nerrs++;
> + }
> }
>
> int main(void)
> @@ -105,10 +179,52 @@ int main(void)
> assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */
>
> /* #BR -- should deliver SIG??? */
> - do_test(&v86, vmcode_bound - vmcode, "#BR");
> -
> - /* SYSENTER -- should cause #GP or #UD depending on CPU */
> - do_test(&v86, vmcode_sysenter - vmcode, "SYSENTER");
> + do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR");
> +
> + /*
> + * SYSENTER -- should cause #GP or #UD depending on CPU.
> + * Expected return type -1 means that we shouldn't validate
> + * the vm86 return value. This will avoid problems on non-SEP
> + * CPUs.
> + */
> + sethandler(SIGILL, sighandler, 0);
> + do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER");
> + clearhandler(SIGILL);
> +
> + /*
> + * SYSCALL would be a disaster in VM86 mode. Fortunately,
> + * there is no kernel that both enables SYSCALL and sets
> + * EFER.SCE, so it's #UD on all systems. But vm86 is
> + * buggy (or has a "feature"), so the SIGILL will actually
> + * be delivered.
> + */
> + sethandler(SIGILL, sighandler, 0);
> + do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL");
> + clearhandler(SIGILL);
> +
> + /* STI with VIP set */
> + v86.regs.eflags |= X86_EFLAGS_VIP;
> + v86.regs.eflags &= ~X86_EFLAGS_IF;
> + do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set");
> +
> + /* INT3 -- should cause #BP */
> + do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3");
> +
> + /* INT80 -- should exit with "INTx 0x80" */
> + v86.regs.eax = (unsigned int)-1;
> + do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80");
> +
> + /* Execute a null pointer */
> + v86.regs.cs = 0;
> + v86.regs.ss = 0;
> + sethandler(SIGSEGV, sighandler, 0);
> + got_signal = 0;
> + do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer");
> + if (!got_signal) {
> + printf("[FAIL]\tDid not receive SIGSEGV\n");
> + nerrs++;
> + }
> + clearhandler(SIGSEGV);
>
> return (nerrs == 0 ? 0 : 1);
> }
> --
> 2.4.3
>



--
Andy Lutomirski
AMA Capital Management, LLC

Subject: [tip:x86/asm] x86/selftests, x86/vm86: Improve entry_from_vm86 selftest

Commit-ID: f2a50f8b7da45ff2de93a71393e715a2ab9f3b68
Gitweb: http://git.kernel.org/tip/f2a50f8b7da45ff2de93a71393e715a2ab9f3b68
Author: Andy Lutomirski <[email protected]>
AuthorDate: Thu, 9 Jul 2015 19:17:29 -0700
Committer: Ingo Molnar <[email protected]>
CommitDate: Tue, 21 Jul 2015 10:51:20 +0200

x86/selftests, x86/vm86: Improve entry_from_vm86 selftest

The entry_from_vm86 selftest was very weak. Improve it: test
more types of kernel entries from vm86 mode and test them more
carefully.

While we're at it, try to improve behavior on non-SEP CPUs. The
old code was buggy because I misunderstood the intended
semantics of #UD in vm86, so I didn't handle a possible signal.

Signed-off-by: Andy Lutomirski <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Brian Gerst <[email protected]>
Cc: Denys Vlasenko <[email protected]>
Cc: Denys Vlasenko <[email protected]>
Cc: Frederic Weisbecker <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Oleg Nesterov <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Shuah Khan <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Link: http://lkml.kernel.org/r/d8ef1d7368ac70d8342481563ed50f9a7d2eea6f.1436492057.git.luto@kernel.org
Signed-off-by: Ingo Molnar <[email protected]>
---
tools/testing/selftests/x86/entry_from_vm86.c | 132 ++++++++++++++++++++++++--
1 file changed, 124 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c
index 5c38a18..f004b2a 100644
--- a/tools/testing/selftests/x86/entry_from_vm86.c
+++ b/tools/testing/selftests/x86/entry_from_vm86.c
@@ -28,6 +28,55 @@
static unsigned long load_addr = 0x10000;
static int nerrs = 0;

+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static sig_atomic_t got_signal;
+
+static void sighandler(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t*)ctx_void;
+
+ if (ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_VM ||
+ (ctx->uc_mcontext.gregs[REG_CS] & 3) != 3) {
+ printf("[FAIL]\tSignal frame should not reflect vm86 mode\n");
+ nerrs++;
+ }
+
+ const char *signame;
+ if (sig == SIGSEGV)
+ signame = "SIGSEGV";
+ else if (sig == SIGILL)
+ signame = "SIGILL";
+ else
+ signame = "unexpected signal";
+
+ printf("[INFO]\t%s: FLAGS = 0x%lx, CS = 0x%hx\n", signame,
+ (unsigned long)ctx->uc_mcontext.gregs[REG_EFL],
+ (unsigned short)ctx->uc_mcontext.gregs[REG_CS]);
+
+ got_signal = 1;
+}
+
asm (
".pushsection .rodata\n\t"
".type vmcode_bound, @object\n\t"
@@ -38,6 +87,14 @@ asm (
"int3\n\t"
"vmcode_sysenter:\n\t"
"sysenter\n\t"
+ "vmcode_syscall:\n\t"
+ "syscall\n\t"
+ "vmcode_sti:\n\t"
+ "sti\n\t"
+ "vmcode_int3:\n\t"
+ "int3\n\t"
+ "vmcode_int80:\n\t"
+ "int $0x80\n\t"
".size vmcode, . - vmcode\n\t"
"end_vmcode:\n\t"
".code32\n\t"
@@ -45,9 +102,11 @@ asm (
);

extern unsigned char vmcode[], end_vmcode[];
-extern unsigned char vmcode_bound[], vmcode_sysenter[];
+extern unsigned char vmcode_bound[], vmcode_sysenter[], vmcode_syscall[],
+ vmcode_sti[], vmcode_int3[], vmcode_int80[];

static void do_test(struct vm86plus_struct *v86, unsigned long eip,
+ unsigned int rettype, unsigned int retarg,
const char *text)
{
long ret;
@@ -73,13 +132,28 @@ static void do_test(struct vm86plus_struct *v86, unsigned long eip,
else
sprintf(trapname, "%d", trapno);

- printf("[OK]\tExited vm86 mode due to #%s\n", trapname);
+ printf("[INFO]\tExited vm86 mode due to #%s\n", trapname);
} else if (VM86_TYPE(ret) == VM86_UNKNOWN) {
- printf("[OK]\tExited vm86 mode due to unhandled GP fault\n");
+ printf("[INFO]\tExited vm86 mode due to unhandled GP fault\n");
+ } else if (VM86_TYPE(ret) == VM86_TRAP) {
+ printf("[INFO]\tExited vm86 mode due to a trap (arg=%ld)\n",
+ VM86_ARG(ret));
+ } else if (VM86_TYPE(ret) == VM86_SIGNAL) {
+ printf("[INFO]\tExited vm86 mode due to a signal\n");
+ } else if (VM86_TYPE(ret) == VM86_STI) {
+ printf("[INFO]\tExited vm86 mode due to STI\n");
} else {
- printf("[OK]\tExited vm86 mode due to type %ld, arg %ld\n",
+ printf("[INFO]\tExited vm86 mode due to type %ld, arg %ld\n",
VM86_TYPE(ret), VM86_ARG(ret));
}
+
+ if (rettype == -1 ||
+ (VM86_TYPE(ret) == rettype && VM86_ARG(ret) == retarg)) {
+ printf("[OK]\tReturned correctly\n");
+ } else {
+ printf("[FAIL]\tIncorrect return reason\n");
+ nerrs++;
+ }
}

int main(void)
@@ -105,10 +179,52 @@ int main(void)
assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */

/* #BR -- should deliver SIG??? */
- do_test(&v86, vmcode_bound - vmcode, "#BR");
-
- /* SYSENTER -- should cause #GP or #UD depending on CPU */
- do_test(&v86, vmcode_sysenter - vmcode, "SYSENTER");
+ do_test(&v86, vmcode_bound - vmcode, VM86_INTx, 5, "#BR");
+
+ /*
+ * SYSENTER -- should cause #GP or #UD depending on CPU.
+ * Expected return type -1 means that we shouldn't validate
+ * the vm86 return value. This will avoid problems on non-SEP
+ * CPUs.
+ */
+ sethandler(SIGILL, sighandler, 0);
+ do_test(&v86, vmcode_sysenter - vmcode, -1, 0, "SYSENTER");
+ clearhandler(SIGILL);
+
+ /*
+ * SYSCALL would be a disaster in VM86 mode. Fortunately,
+ * there is no kernel that both enables SYSCALL and sets
+ * EFER.SCE, so it's #UD on all systems. But vm86 is
+ * buggy (or has a "feature"), so the SIGILL will actually
+ * be delivered.
+ */
+ sethandler(SIGILL, sighandler, 0);
+ do_test(&v86, vmcode_syscall - vmcode, VM86_SIGNAL, 0, "SYSCALL");
+ clearhandler(SIGILL);
+
+ /* STI with VIP set */
+ v86.regs.eflags |= X86_EFLAGS_VIP;
+ v86.regs.eflags &= ~X86_EFLAGS_IF;
+ do_test(&v86, vmcode_sti - vmcode, VM86_STI, 0, "STI with VIP set");
+
+ /* INT3 -- should cause #BP */
+ do_test(&v86, vmcode_int3 - vmcode, VM86_TRAP, 3, "INT3");
+
+ /* INT80 -- should exit with "INTx 0x80" */
+ v86.regs.eax = (unsigned int)-1;
+ do_test(&v86, vmcode_int80 - vmcode, VM86_INTx, 0x80, "int80");
+
+ /* Execute a null pointer */
+ v86.regs.cs = 0;
+ v86.regs.ss = 0;
+ sethandler(SIGSEGV, sighandler, 0);
+ got_signal = 0;
+ do_test(&v86, 0, VM86_SIGNAL, 0, "Execute null pointer");
+ if (!got_signal) {
+ printf("[FAIL]\tDid not receive SIGSEGV\n");
+ nerrs++;
+ }
+ clearhandler(SIGSEGV);

return (nerrs == 0 ? 0 : 1);
}