2011-06-06 17:27:46

by Andrew Lutomirski

[permalink] [raw]
Subject: [PATCH 0/3] x86-64: vsyscall emulation cleanups

[cc list trimmed from the previous patches to include only people who seem
like they would care about this round of changes. My apologies if I missed
anyone.]

This series applies to the x86/vdso branch of the -tip tree.

Patch 1/3 deletes some very outdated comments in vsyscall_64.c.

Patch 2/3 simplifies the vsyscall emulation int 0xcc mechanism and removes
some ret instructions that are (as pointed out by the PaX team) possibly
dangerous.

Patch 3/3 fixes what is arguably the most serious problem of all: people
thought that CONFIG_COMPAT_VSYSCALLS controlled binary compatibility. It
changes the Kconfig short description, help text, and feature removal entry
to make it a lot more clear what's going on.

Andy Lutomirski (3):
x86-64: Fix outdated comments in vsyscall_64.c
x86-64: Clean up vsyscall emulation and remove fixed-address ret
x86-64: Clarify CONFIG_COMPAT_VSYSCALLS text

Documentation/feature-removal-schedule.txt | 11 ++-
arch/x86/Kconfig | 27 +++++----
arch/x86/include/asm/vsyscall.h | 10 +++-
arch/x86/kernel/vsyscall_64.c | 85 +++++++++-------------------
arch/x86/kernel/vsyscall_emu_64.S | 17 +-----
5 files changed, 58 insertions(+), 92 deletions(-)

--
1.7.5.2


2011-06-06 17:31:13

by Andrew Lutomirski

[permalink] [raw]
Subject: [PATCH 1/3] x86-64: Fix outdated comments in vsyscall_64.c

Signed-off-by: Andy Lutomirski <[email protected]>
---
arch/x86/kernel/vsyscall_64.c | 7 ++++---
1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 27d49b7..0b50b3c 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -11,10 +11,11 @@
* vsyscalls. One vsyscall can reserve more than 1 slot to avoid
* jumping out of line if necessary. We cannot add more with this
* mechanism because older kernels won't return -ENOSYS.
- * If we want more than four we need a vDSO.
*
- * Note: the concept clashes with user mode linux. If you use UML and
- * want per guest time just set the kernel.vsyscall64 sysctl to 0.
+ * This mechanism is deprecated in favor of the vDSO.
+ *
+ * Note: the concept clashes with user mode linux. UML users should
+ * use the vDSO.
*/

/* Disable profiling for userspace code: */
--
1.7.5.2

2011-06-06 17:30:40

by Andrew Lutomirski

[permalink] [raw]
Subject: [PATCH 2/3] x86-64: Clean up vsyscall emulation and remove fixed-address ret

Remove the fixed-address ret in the vsyscall emulation stubs. As a
result, int 0xcc no longer works if executed from user address space
(which is OK -- nothing sensible should do that).

Removing support for int 0xcc in user address space cleans up the
code considerably.

Signed-off-by: Andy Lutomirski <[email protected]>
---
arch/x86/include/asm/vsyscall.h | 10 ++++-
arch/x86/kernel/vsyscall_64.c | 78 +++++++++++--------------------------
arch/x86/kernel/vsyscall_emu_64.S | 17 +--------
3 files changed, 32 insertions(+), 73 deletions(-)

diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index 293ae08..bb710cb 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -32,9 +32,15 @@ extern struct timezone sys_tz;
extern void map_vsyscall(void);

/* Emulation */
-static inline bool in_vsyscall_page(unsigned long addr)
+
+static inline bool is_vsyscall_entry(unsigned long addr)
+{
+ return (addr & ~0xC00UL) == VSYSCALL_START;
+}
+
+static inline int vsyscall_entry_nr(unsigned long addr)
{
- return (addr & ~(PAGE_SIZE - 1)) == VSYSCALL_START;
+ return (addr & 0xC00UL) >> 10;
}

#endif /* __KERNEL__ */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 0b50b3c..04540f7 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -96,25 +96,10 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,

tsk = current;

- printk("%s%s[%d] %s ip:%lx sp:%lx ax:%lx si:%lx di:%lx",
+ printk("%s%s[%d] %s ip:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
level, tsk->comm, task_pid_nr(tsk),
message,
regs->ip - 2, regs->sp, regs->ax, regs->si, regs->di);
- if (!in_vsyscall_page(regs->ip - 2))
- print_vma_addr(" in ", regs->ip - 2);
- printk("\n");
-}
-
-/* al values for each vsyscall; see vsyscall_emu_64.S for why. */
-static u8 vsyscall_nr_to_al[] = {0xcc, 0xce, 0xf0};
-
-static int al_to_vsyscall_nr(u8 al)
-{
- int i;
- for (i = 0; i < ARRAY_SIZE(vsyscall_nr_to_al); i++)
- if (vsyscall_nr_to_al[i] == al)
- return i;
- return -1;
}

#ifdef CONFIG_COMPAT_VSYSCALLS
@@ -141,17 +126,12 @@ vtime(time_t *t)

#endif /* CONFIG_COMPAT_VSYSCALLS */

-/* If CONFIG_COMPAT_VSYSCALLS=y, then this is incorrect for vsyscall_nr == 1. */
-static inline unsigned long vsyscall_intcc_addr(int vsyscall_nr)
-{
- return VSYSCALL_START + 1024*vsyscall_nr + 2;
-}
-
void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
{
static DEFINE_RATELIMIT_STATE(rs, 3600 * HZ, 3);
struct task_struct *tsk;
const char *vsyscall_name;
+ unsigned long caller;
int vsyscall_nr;
long ret;

@@ -160,39 +140,26 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)

local_irq_enable();

- vsyscall_nr = al_to_vsyscall_nr(regs->ax & 0xff);
- if (vsyscall_nr < 0) {
+ /*
+ * x86-ism here: regs->ip points to the instruction after the int 0xcc,
+ * and int 0xcc is two bytes long.
+ */
+ if (!is_vsyscall_entry(regs->ip - 2)) {
warn_bad_vsyscall(KERN_WARNING, regs, "illegal int 0xcc "
"(exploit attempt?)");
goto sigsegv;
}
+ vsyscall_nr = vsyscall_entry_nr(regs->ip - 2);

- if (regs->ip - 2 != vsyscall_intcc_addr(vsyscall_nr)) {
- if (in_vsyscall_page(regs->ip - 2)) {
- /* This should not be possible. */
- warn_bad_vsyscall(KERN_WARNING, regs,
- "int 0xcc bogus magic "
- "(exploit attempt?)");
- goto sigsegv;
- } else {
- /*
- * We allow the call because tools like ThreadSpotter
- * might copy the int 0xcc instruction to user memory.
- * We make it annoying, though, to try to persuade
- * the authors to stop doing that...
- */
- warn_bad_vsyscall(KERN_WARNING, regs,
- "int 0xcc in user code "
- "(exploit attempt? legacy "
- "instrumented code?)");
- }
+ if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
+ warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack "
+ "(exploit attempt?)");
+ goto sigsegv;
}

tsk = current;
- if (seccomp_mode(&tsk->seccomp)) {
+ if (seccomp_mode(&tsk->seccomp))
do_exit(SIGKILL);
- goto out;
- }

switch (vsyscall_nr) {
case 0:
@@ -202,12 +169,8 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
(struct timezone __user *)regs->si);
break;

+#ifndef CONFIG_COMPAT_VSYSCALLS
case 1:
-#ifdef CONFIG_COMPAT_VSYSCALLS
- warn_bad_vsyscall(KERN_WARNING, regs, "bogus time() vsyscall "
- "emulation (exploit attempt?)");
- goto sigsegv;
-#else
vsyscall_name = "time";
ret = sys_time((time_t __user *)regs->di);
break;
@@ -221,6 +184,11 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
break;

default:
+ /*
+ * If we get here, then vsyscall_nr indicates that int 0xcc
+ * happened at an address in the vsyscall page that doesn't
+ * contain int 0xcc. That can't happen.
+ */
BUG();
}

@@ -240,9 +208,6 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
regs->ax = ret;

if (__ratelimit(&rs)) {
- unsigned long caller;
- if (get_user(caller, (unsigned long __user *)regs->sp))
- caller = 0; /* no need to crash on this fault. */
printk(KERN_INFO "%s[%d] emulated legacy vsyscall %s(); "
"upgrade your code to avoid a performance hit. "
"ip:%lx sp:%lx caller:%lx",
@@ -253,7 +218,10 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
printk("\n");
}

-out:
+ /* Emulate a ret instruction. */
+ regs->ip = caller;
+ regs->sp += 8;
+
local_irq_disable();
return;

diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S
index 2d53e26..5658d42 100644
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ b/arch/x86/kernel/vsyscall_emu_64.S
@@ -7,36 +7,21 @@
#include <linux/linkage.h>
#include <asm/irq_vectors.h>

-/*
- * These magic incantations are chosen so that they fault if entered anywhere
- * other than an instruction boundary. The movb instruction is two bytes, and
- * the int imm8 instruction is also two bytes, so the only misaligned places
- * to enter are the immediate values for the two instructions. 0xcc is int3
- * (always faults), 0xce is into (faults on x64-64, and 32-bit code can't get
- * here), and 0xf0 is lock (lock int is invalid).
- *
- * The unused parts of the page are filled with 0xcc by the linker script.
- */
+/* The unused parts of the page are filled with 0xcc by the linker script. */

.section .vsyscall_0, "a"
ENTRY(vsyscall_0)
- movb $0xcc, %al
int $VSYSCALL_EMU_VECTOR
- ret
END(vsyscall_0)

#ifndef CONFIG_COMPAT_VSYSCALLS
.section .vsyscall_1, "a"
ENTRY(vsyscall_1)
- movb $0xce, %al
int $VSYSCALL_EMU_VECTOR
- ret
END(vsyscall_1)
#endif

.section .vsyscall_2, "a"
ENTRY(vsyscall_2)
- movb $0xf0, %al
int $VSYSCALL_EMU_VECTOR
- ret
END(vsyscall_2)
--
1.7.5.2

2011-06-06 17:28:04

by Andrew Lutomirski

[permalink] [raw]
Subject: [PATCH 3/3] x86-64: Clarify CONFIG_COMPAT_VSYSCALLS text

Enough people are confused about whether CONFIG_COMPAT_VSYSCALLS=n
breaks binary compatibility that we should make it harder to be
confused. So make it more clear what's going on.

The new text is a slight lie in that CONFIG_COMPAT_VSYSCALLS could
make it slightly harder to exploit *kernel* bugs once the kernel
address layout is randomized, but I mentioning that in the help text
might just cause more confusion

Signed-off-by: Andy Lutomirski <[email protected]>
---
Documentation/feature-removal-schedule.txt | 11 +++++++----
arch/x86/Kconfig | 27 +++++++++++++++------------
2 files changed, 22 insertions(+), 16 deletions(-)

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 4282ab2..ef5d1e9 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -602,11 +602,14 @@ Who: Laurent Pinchart <[email protected]>
----------------------------

What: CONFIG_COMPAT_VSYSCALLS (x86_64)
-When: When glibc 2.14 or newer is ubitquitous. Perhaps mid-2012.
+When: When glibc 2.14 or newer is ubiquitous. Perhaps 2013.
Why: Having user-executable syscall invoking code at a fixed addresses makes
- it easier for attackers to exploit security holes.
- Turning off CONFIG_COMPAT_VSYSCALLS mostly removes the risk but will
- make the time() function slower on glibc versions 2.13 and below.
+ it easier for attackers to exploit security holes. Turning off
+ CONFIG_COMPAT_VSYSCALLS reduces the risk without breaking binary
+ compatibility but will make the time() function slower on glibc
+ versions 2.13 and below.
+
+ We may flip the default setting to N before 2013.
Who: Andy Lutomirski <[email protected]>

----------------------------
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 30041d8..17d99bc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1648,24 +1648,27 @@ config COMPAT_VDSO

config COMPAT_VSYSCALLS
def_bool y
- prompt "Fixed address legacy vsyscalls"
+ prompt "Fast legacy sys_time() vsyscall"
depends on X86_64
---help---
- Legacy user code expects to be able to issue three syscalls
- by calling a fixed addresses. If you say N, then the kernel
- traps and emulates these calls. If you say Y, then there is
- actual executable code at a fixed address to implement time()
- efficiently.
+ Glibc 2.13 and older, statically linked binaries, and a few
+ other things use a legacy ABI to implement time().

- On a system with recent enough glibc (probably 2.14 or
- newer) and no static binaries, you can say N without a
- performance penalty to improve security: having no fixed
- address userspace-executable syscall invoking code makes
- it harder for both remote and local attackers to exploit
- security holes.
+ If you say N here, the kernel will emulate that interface in
+ order to make certain types of userspace bugs more difficult
+ to exploit. This will cause some legacy software to run
+ slightly more slowly.
+
+ If you say Y here, then the kernel will provide native code to
+ allow legacy programs to run without any performance impact.
+ This could make it easier to exploit certain types of
+ userspace bugs.

If unsure, say Y.

+ NOTE: disabling this option will not break any ABI; the kernel
+ will be fully compatible with all binaries either way.
+
config CMDLINE_BOOL
bool "Built-in kernel command line"
---help---
--
1.7.5.2

2011-06-06 17:41:41

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/3] x86-64: Clean up vsyscall emulation and remove fixed-address ret


* Andy Lutomirski <[email protected]> wrote:

> Remove the fixed-address ret in the vsyscall emulation stubs. As a
> result, int 0xcc no longer works if executed from user address space
> (which is OK -- nothing sensible should do that).
>
> Removing support for int 0xcc in user address space cleans up the
> code considerably.
>
> Signed-off-by: Andy Lutomirski <[email protected]>
> ---
> arch/x86/include/asm/vsyscall.h | 10 ++++-
> arch/x86/kernel/vsyscall_64.c | 78 +++++++++++--------------------------
> arch/x86/kernel/vsyscall_emu_64.S | 17 +--------
> 3 files changed, 32 insertions(+), 73 deletions(-)

Ok, this makes the whole series a lot more palatable!

Can i propagate the rename suggested by hpa into patch #3:

CONFIG_COMPAT_VSYSCALL => CONFIG_LEGACY_VSYSCALL

? 'legacy' is probably the best name for this and it thus won't be
confused with the 32-bit compat facilities we have.

Thanks,

Ingo

2011-06-06 17:46:16

by Andrew Lutomirski

[permalink] [raw]
Subject: Re: [PATCH 2/3] x86-64: Clean up vsyscall emulation and remove fixed-address ret

On Mon, Jun 6, 2011 at 1:41 PM, Ingo Molnar <[email protected]> wrote:
>
> * Andy Lutomirski <[email protected]> wrote:
>
>> Remove the fixed-address ret in the vsyscall emulation stubs. ?As a
>> result, int 0xcc no longer works if executed from user address space
>> (which is OK -- nothing sensible should do that).
>>
>> Removing support for int 0xcc in user address space cleans up the
>> code considerably.
>>
>> Signed-off-by: Andy Lutomirski <[email protected]>
>> ---
>> ?arch/x86/include/asm/vsyscall.h ? | ? 10 ++++-
>> ?arch/x86/kernel/vsyscall_64.c ? ? | ? 78 +++++++++++--------------------------
>> ?arch/x86/kernel/vsyscall_emu_64.S | ? 17 +--------
>> ?3 files changed, 32 insertions(+), 73 deletions(-)
>
> Ok, this makes the whole series a lot more palatable!
>
> Can i propagate the rename suggested by hpa into patch #3:
>
> ?CONFIG_COMPAT_VSYSCALL => CONFIG_LEGACY_VSYSCALL

Certainly! CONFIG_LEGACY_VTIME might be even better, but I'm fine
with any renaming you'd like to do.

--Andy

>
> ? 'legacy' is probably the best name for this and it thus won't be
> confused with the 32-bit compat facilities we have.
>
> Thanks,
>
> ? ? ? ?Ingo
>

2011-06-06 17:51:15

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 2/3] x86-64: Clean up vsyscall emulation and remove fixed-address ret


* Andrew Lutomirski <[email protected]> wrote:

> >> ?arch/x86/include/asm/vsyscall.h ? | ? 10 ++++-
> >> ?arch/x86/kernel/vsyscall_64.c ? ? | ? 78 +++++++++++--------------------------
> >> ?arch/x86/kernel/vsyscall_emu_64.S | ? 17 +--------
> >> ?3 files changed, 32 insertions(+), 73 deletions(-)
> >
> > Ok, this makes the whole series a lot more palatable!
> >
> > Can i propagate the rename suggested by hpa into patch #3:
> >
> > ?CONFIG_COMPAT_VSYSCALL => CONFIG_LEGACY_VSYSCALL
>
> Certainly! CONFIG_LEGACY_VTIME might be even better, but I'm fine
> with any renaming you'd like to do.

Good point, i changed it to CONFIG_LEGACY_VTIME - this correctly
implies that it's only about vtime(), not about any other vsyscall.

Thanks,

Ingo

2011-06-06 21:40:46

by Andrew Lutomirski

[permalink] [raw]
Subject: [tip:x86/vdso] x86-64: Fix outdated comments in vsyscall_64.c

Commit-ID: 8d6316596441de42e3f30c8e536579f6292b46a0
Gitweb: http://git.kernel.org/tip/8d6316596441de42e3f30c8e536579f6292b46a0
Author: Andy Lutomirski <[email protected]>
AuthorDate: Mon, 6 Jun 2011 13:27:23 -0400
Committer: Ingo Molnar <[email protected]>
CommitDate: Mon, 6 Jun 2011 19:47:20 +0200

x86-64: Fix outdated comments in vsyscall_64.c

Signed-off-by: Andy Lutomirski <[email protected]>
Cc: [email protected]
Cc: Mikael Pettersson <[email protected]>
Cc: Linus Torvalds <[email protected]>
Link: http://lkml.kernel.org/r/ab1699caaea6c7863fb74abc2f5718353680070c.1307380481.git.luto@mit.edu
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/kernel/vsyscall_64.c | 7 ++++---
1 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 27d49b7..0b50b3c 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -11,10 +11,11 @@
* vsyscalls. One vsyscall can reserve more than 1 slot to avoid
* jumping out of line if necessary. We cannot add more with this
* mechanism because older kernels won't return -ENOSYS.
- * If we want more than four we need a vDSO.
*
- * Note: the concept clashes with user mode linux. If you use UML and
- * want per guest time just set the kernel.vsyscall64 sysctl to 0.
+ * This mechanism is deprecated in favor of the vDSO.
+ *
+ * Note: the concept clashes with user mode linux. UML users should
+ * use the vDSO.
*/

/* Disable profiling for userspace code: */

2011-06-06 21:41:13

by Andrew Lutomirski

[permalink] [raw]
Subject: [tip:x86/vdso] x86-64: Clean up vsyscall emulation and remove fixed-address ret

Commit-ID: 7dc0452808b75615d1dc4d5160b26aaabc6a7300
Gitweb: http://git.kernel.org/tip/7dc0452808b75615d1dc4d5160b26aaabc6a7300
Author: Andy Lutomirski <[email protected]>
AuthorDate: Mon, 6 Jun 2011 13:27:24 -0400
Committer: Ingo Molnar <[email protected]>
CommitDate: Mon, 6 Jun 2011 19:47:20 +0200

x86-64: Clean up vsyscall emulation and remove fixed-address ret

Remove the fixed-address ret in the vsyscall emulation stubs.
As a result, int 0xcc no longer works if executed from user
address space (which is OK -- nothing sensible should do that).

Removing support for int 0xcc in user address space cleans up
the code considerably.

Signed-off-by: Andy Lutomirski <[email protected]>
Cc: [email protected]
Cc: Mikael Pettersson <[email protected]>
Cc: Linus Torvalds <[email protected]>
Link: http://lkml.kernel.org/r/bd468a0e02befab146667fab5de57df7332d54d9.1307380481.git.luto@mit.edu
Signed-off-by: Ingo Molnar <[email protected]>
---
arch/x86/include/asm/vsyscall.h | 10 ++++-
arch/x86/kernel/vsyscall_64.c | 78 +++++++++++--------------------------
arch/x86/kernel/vsyscall_emu_64.S | 17 +--------
3 files changed, 32 insertions(+), 73 deletions(-)

diff --git a/arch/x86/include/asm/vsyscall.h b/arch/x86/include/asm/vsyscall.h
index 293ae08..bb710cb 100644
--- a/arch/x86/include/asm/vsyscall.h
+++ b/arch/x86/include/asm/vsyscall.h
@@ -32,9 +32,15 @@ extern struct timezone sys_tz;
extern void map_vsyscall(void);

/* Emulation */
-static inline bool in_vsyscall_page(unsigned long addr)
+
+static inline bool is_vsyscall_entry(unsigned long addr)
+{
+ return (addr & ~0xC00UL) == VSYSCALL_START;
+}
+
+static inline int vsyscall_entry_nr(unsigned long addr)
{
- return (addr & ~(PAGE_SIZE - 1)) == VSYSCALL_START;
+ return (addr & 0xC00UL) >> 10;
}

#endif /* __KERNEL__ */
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 0b50b3c..04540f7 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -96,25 +96,10 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,

tsk = current;

- printk("%s%s[%d] %s ip:%lx sp:%lx ax:%lx si:%lx di:%lx",
+ printk("%s%s[%d] %s ip:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
level, tsk->comm, task_pid_nr(tsk),
message,
regs->ip - 2, regs->sp, regs->ax, regs->si, regs->di);
- if (!in_vsyscall_page(regs->ip - 2))
- print_vma_addr(" in ", regs->ip - 2);
- printk("\n");
-}
-
-/* al values for each vsyscall; see vsyscall_emu_64.S for why. */
-static u8 vsyscall_nr_to_al[] = {0xcc, 0xce, 0xf0};
-
-static int al_to_vsyscall_nr(u8 al)
-{
- int i;
- for (i = 0; i < ARRAY_SIZE(vsyscall_nr_to_al); i++)
- if (vsyscall_nr_to_al[i] == al)
- return i;
- return -1;
}

#ifdef CONFIG_COMPAT_VSYSCALLS
@@ -141,17 +126,12 @@ vtime(time_t *t)

#endif /* CONFIG_COMPAT_VSYSCALLS */

-/* If CONFIG_COMPAT_VSYSCALLS=y, then this is incorrect for vsyscall_nr == 1. */
-static inline unsigned long vsyscall_intcc_addr(int vsyscall_nr)
-{
- return VSYSCALL_START + 1024*vsyscall_nr + 2;
-}
-
void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
{
static DEFINE_RATELIMIT_STATE(rs, 3600 * HZ, 3);
struct task_struct *tsk;
const char *vsyscall_name;
+ unsigned long caller;
int vsyscall_nr;
long ret;

@@ -160,39 +140,26 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)

local_irq_enable();

- vsyscall_nr = al_to_vsyscall_nr(regs->ax & 0xff);
- if (vsyscall_nr < 0) {
+ /*
+ * x86-ism here: regs->ip points to the instruction after the int 0xcc,
+ * and int 0xcc is two bytes long.
+ */
+ if (!is_vsyscall_entry(regs->ip - 2)) {
warn_bad_vsyscall(KERN_WARNING, regs, "illegal int 0xcc "
"(exploit attempt?)");
goto sigsegv;
}
+ vsyscall_nr = vsyscall_entry_nr(regs->ip - 2);

- if (regs->ip - 2 != vsyscall_intcc_addr(vsyscall_nr)) {
- if (in_vsyscall_page(regs->ip - 2)) {
- /* This should not be possible. */
- warn_bad_vsyscall(KERN_WARNING, regs,
- "int 0xcc bogus magic "
- "(exploit attempt?)");
- goto sigsegv;
- } else {
- /*
- * We allow the call because tools like ThreadSpotter
- * might copy the int 0xcc instruction to user memory.
- * We make it annoying, though, to try to persuade
- * the authors to stop doing that...
- */
- warn_bad_vsyscall(KERN_WARNING, regs,
- "int 0xcc in user code "
- "(exploit attempt? legacy "
- "instrumented code?)");
- }
+ if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
+ warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack "
+ "(exploit attempt?)");
+ goto sigsegv;
}

tsk = current;
- if (seccomp_mode(&tsk->seccomp)) {
+ if (seccomp_mode(&tsk->seccomp))
do_exit(SIGKILL);
- goto out;
- }

switch (vsyscall_nr) {
case 0:
@@ -202,12 +169,8 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
(struct timezone __user *)regs->si);
break;

+#ifndef CONFIG_COMPAT_VSYSCALLS
case 1:
-#ifdef CONFIG_COMPAT_VSYSCALLS
- warn_bad_vsyscall(KERN_WARNING, regs, "bogus time() vsyscall "
- "emulation (exploit attempt?)");
- goto sigsegv;
-#else
vsyscall_name = "time";
ret = sys_time((time_t __user *)regs->di);
break;
@@ -221,6 +184,11 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
break;

default:
+ /*
+ * If we get here, then vsyscall_nr indicates that int 0xcc
+ * happened at an address in the vsyscall page that doesn't
+ * contain int 0xcc. That can't happen.
+ */
BUG();
}

@@ -240,9 +208,6 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
regs->ax = ret;

if (__ratelimit(&rs)) {
- unsigned long caller;
- if (get_user(caller, (unsigned long __user *)regs->sp))
- caller = 0; /* no need to crash on this fault. */
printk(KERN_INFO "%s[%d] emulated legacy vsyscall %s(); "
"upgrade your code to avoid a performance hit. "
"ip:%lx sp:%lx caller:%lx",
@@ -253,7 +218,10 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
printk("\n");
}

-out:
+ /* Emulate a ret instruction. */
+ regs->ip = caller;
+ regs->sp += 8;
+
local_irq_disable();
return;

diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S
index 2d53e26..5658d42 100644
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ b/arch/x86/kernel/vsyscall_emu_64.S
@@ -7,36 +7,21 @@
#include <linux/linkage.h>
#include <asm/irq_vectors.h>

-/*
- * These magic incantations are chosen so that they fault if entered anywhere
- * other than an instruction boundary. The movb instruction is two bytes, and
- * the int imm8 instruction is also two bytes, so the only misaligned places
- * to enter are the immediate values for the two instructions. 0xcc is int3
- * (always faults), 0xce is into (faults on x64-64, and 32-bit code can't get
- * here), and 0xf0 is lock (lock int is invalid).
- *
- * The unused parts of the page are filled with 0xcc by the linker script.
- */
+/* The unused parts of the page are filled with 0xcc by the linker script. */

.section .vsyscall_0, "a"
ENTRY(vsyscall_0)
- movb $0xcc, %al
int $VSYSCALL_EMU_VECTOR
- ret
END(vsyscall_0)

#ifndef CONFIG_COMPAT_VSYSCALLS
.section .vsyscall_1, "a"
ENTRY(vsyscall_1)
- movb $0xce, %al
int $VSYSCALL_EMU_VECTOR
- ret
END(vsyscall_1)
#endif

.section .vsyscall_2, "a"
ENTRY(vsyscall_2)
- movb $0xf0, %al
int $VSYSCALL_EMU_VECTOR
- ret
END(vsyscall_2)

2011-06-06 21:41:37

by Andrew Lutomirski

[permalink] [raw]
Subject: [tip:x86/vdso] x86-64: Rename COMPAT_VSYSCALLS to LEGACY_VTIME and clarify documentation

Commit-ID: feba7e97df8c463331071b79fba2164ead6aa14b
Gitweb: http://git.kernel.org/tip/feba7e97df8c463331071b79fba2164ead6aa14b
Author: Andy Lutomirski <[email protected]>
AuthorDate: Mon, 6 Jun 2011 13:27:25 -0400
Committer: Ingo Molnar <[email protected]>
CommitDate: Mon, 6 Jun 2011 19:49:31 +0200

x86-64: Rename COMPAT_VSYSCALLS to LEGACY_VTIME and clarify documentation

Rename COMPAT_VSYSCALLS to LEGACY_VTIME to make sure
it's not confused with the 32-bit compat facilities we
have on x86-64.

Also, in the discussions enough people were confused about
whether LEGACY_VTIME=n breaks binary compatibility that
we should make it harder to be confused. So make it more
clear what's going on.

[ The new text is slightly inaccurate in that LEGACY_VTIME
could make it slightly harder to exploit *kernel* bugs once the
kernel address layout is randomized, but me mentioning that in
the help text might just cause more confusion. ]

Signed-off-by: Andy Lutomirski <[email protected]>
Cc: [email protected]
Cc: Mikael Pettersson <[email protected]>
Cc: Linus Torvalds <[email protected]>
Link: http://lkml.kernel.org/r/4ad5860f9c9c79ecd303f345cf9c06f8859c44d4.1307380481.git.luto@mit.edu
Signed-off-by: Ingo Molnar <[email protected]>
---
Documentation/feature-removal-schedule.txt | 13 +++++++----
arch/x86/Kconfig | 29 +++++++++++++++------------
arch/x86/kernel/vsyscall_64.c | 6 ++--
arch/x86/kernel/vsyscall_emu_64.S | 2 +-
4 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index 4282ab2..7a7446b 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -601,12 +601,15 @@ Who: Laurent Pinchart <[email protected]>

----------------------------

-What: CONFIG_COMPAT_VSYSCALLS (x86_64)
-When: When glibc 2.14 or newer is ubitquitous. Perhaps mid-2012.
+What: CONFIG_LEGACY_VTIME (x86_64)
+When: When glibc 2.14 or newer is ubiquitous. Perhaps 2013.
Why: Having user-executable syscall invoking code at a fixed addresses makes
- it easier for attackers to exploit security holes.
- Turning off CONFIG_COMPAT_VSYSCALLS mostly removes the risk but will
- make the time() function slower on glibc versions 2.13 and below.
+ it easier for attackers to exploit security holes. Turning off
+ CONFIG_LEGACY_VTIME reduces the risk without breaking binary
+ compatibility but will make the time() function slightly slower on
+ glibc versions 2.13 and below.
+
+ We may flip the default setting to N before 2013.
Who: Andy Lutomirski <[email protected]>

----------------------------
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 30041d8..6746d35 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1646,26 +1646,29 @@ config COMPAT_VDSO

If unsure, say Y.

-config COMPAT_VSYSCALLS
+config LEGACY_VTIME
def_bool y
- prompt "Fixed address legacy vsyscalls"
+ prompt "Fast legacy sys_time() vsyscall"
depends on X86_64
---help---
- Legacy user code expects to be able to issue three syscalls
- by calling a fixed addresses. If you say N, then the kernel
- traps and emulates these calls. If you say Y, then there is
- actual executable code at a fixed address to implement time()
- efficiently.
+ Glibc 2.13 and older, statically linked binaries, and a few
+ other things use a legacy ABI to implement time().

- On a system with recent enough glibc (probably 2.14 or
- newer) and no static binaries, you can say N without a
- performance penalty to improve security: having no fixed
- address userspace-executable syscall invoking code makes
- it harder for both remote and local attackers to exploit
- security holes.
+ If you say N here, the kernel will emulate that interface in
+ order to make certain types of userspace bugs more difficult
+ to exploit. This will cause some legacy software to run
+ slightly more slowly.
+
+ If you say Y here, then the kernel will provide native code to
+ allow legacy programs to run without any performance impact.
+ This could make it easier to exploit certain types of
+ userspace bugs.

If unsure, say Y.

+ NOTE: disabling this option will not break any ABI; the kernel
+ will be fully compatible with all binaries either way.
+
config CMDLINE_BOOL
bool "Built-in kernel command line"
---help---
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index 04540f7..f56644e 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -102,7 +102,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
regs->ip - 2, regs->sp, regs->ax, regs->si, regs->di);
}

-#ifdef CONFIG_COMPAT_VSYSCALLS
+#ifdef CONFIG_LEGACY_VTIME

/* This will break when the xtime seconds get inaccurate, but that is
* unlikely */
@@ -124,7 +124,7 @@ vtime(time_t *t)
return result;
}

-#endif /* CONFIG_COMPAT_VSYSCALLS */
+#endif /* CONFIG_LEGACY_VTIME */

void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
{
@@ -169,7 +169,7 @@ void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
(struct timezone __user *)regs->si);
break;

-#ifndef CONFIG_COMPAT_VSYSCALLS
+#ifndef CONFIG_LEGACY_VTIME
case 1:
vsyscall_name = "time";
ret = sys_time((time_t __user *)regs->di);
diff --git a/arch/x86/kernel/vsyscall_emu_64.S b/arch/x86/kernel/vsyscall_emu_64.S
index 5658d42..b192283 100644
--- a/arch/x86/kernel/vsyscall_emu_64.S
+++ b/arch/x86/kernel/vsyscall_emu_64.S
@@ -14,7 +14,7 @@ ENTRY(vsyscall_0)
int $VSYSCALL_EMU_VECTOR
END(vsyscall_0)

-#ifndef CONFIG_COMPAT_VSYSCALLS
+#ifndef CONFIG_LEGACY_VTIME
.section .vsyscall_1, "a"
ENTRY(vsyscall_1)
int $VSYSCALL_EMU_VECTOR