2024-02-10 09:18:55

by Kees Cook

[permalink] [raw]
Subject: [PATCH] x86/vdso: Move vDSO to mmap region

From: Daniel Micay <[email protected]>

The vDSO (and its initial randomization) was introduced in commit
2aae950b21e4 ("x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu"),
but had very low entropy. The entropy was improved in commit
394f56fe4801 ("x86_64, vdso: Fix the vdso address randomization algorithm"),
but there is still improvement to be made.

On principle there should not be executable code at a low entropy offset
from the stack, since the stack and executable code having separate
randomization is part of what makes ASLR stronger.

Remove the only executable code near the stack region and give the vDSO
the same randomized base as other mmap mappings including the linker
and other shared objects. This results in higher entropy being provided
and there's little to no advantage in separating this from the existing
executable code there. This is already how other architectures like
arm64 handle the vDSO.

As an side, while it's sensible for userspace to reserve the initial
mmap base as a region for executable code with a random gap for other
mmap allocations, along with providing randomization within that region,
there isn't much the kernel can do to help due to how dynamic linkers
load the shared objects.

This was extracted from the PaX RANDMMAP feature.

Closes: https://github.com/KSPP/linux/issues/280
Cc: Andy Lutomirski <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: [email protected]
Cc: "H. Peter Anvin" <[email protected]>
Cc: Eric Biederman <[email protected]>
Cc: Brian Gerst <[email protected]>
Cc: Nikolay Borisov <[email protected]>
Cc: "Chang S. Bae" <[email protected]>
Cc: Igor Zhbanov <[email protected]>
Cc: Rick Edgecombe <[email protected]>
Cc: Randy Dunlap <[email protected]>
Cc: [email protected]
Signed-off-by: Daniel Micay <[email protected]>
[kees: updated commit log with historical details and other tweaks]
Signed-off-by: Kees Cook <[email protected]>
---
arch/x86/entry/vdso/vma.c | 57 ++----------------------------------
arch/x86/include/asm/elf.h | 1 -
arch/x86/kernel/sys_x86_64.c | 7 -----
3 files changed, 2 insertions(+), 63 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 7645730dc228..6d83ceb7f1ba 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -274,59 +274,6 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr)
return ret;
}

-#ifdef CONFIG_X86_64
-/*
- * Put the vdso above the (randomized) stack with another randomized
- * offset. This way there is no hole in the middle of address space.
- * To save memory make sure it is still in the same PTE as the stack
- * top. This doesn't give that many random bits.
- *
- * Note that this algorithm is imperfect: the distribution of the vdso
- * start address within a PMD is biased toward the end.
- *
- * Only used for the 64-bit and x32 vdsos.
- */
-static unsigned long vdso_addr(unsigned long start, unsigned len)
-{
- unsigned long addr, end;
- unsigned offset;
-
- /*
- * Round up the start address. It can start out unaligned as a result
- * of stack start randomization.
- */
- start = PAGE_ALIGN(start);
-
- /* Round the lowest possible end address up to a PMD boundary. */
- end = (start + len + PMD_SIZE - 1) & PMD_MASK;
- if (end >= DEFAULT_MAP_WINDOW)
- end = DEFAULT_MAP_WINDOW;
- end -= len;
-
- if (end > start) {
- offset = get_random_u32_below(((end - start) >> PAGE_SHIFT) + 1);
- addr = start + (offset << PAGE_SHIFT);
- } else {
- addr = start;
- }
-
- /*
- * Forcibly align the final address in case we have a hardware
- * issue that requires alignment for performance reasons.
- */
- addr = align_vdso_addr(addr);
-
- return addr;
-}
-
-static int map_vdso_randomized(const struct vdso_image *image)
-{
- unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start);
-
- return map_vdso(image, addr);
-}
-#endif
-
int map_vdso_once(const struct vdso_image *image, unsigned long addr)
{
struct mm_struct *mm = current->mm;
@@ -369,7 +316,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (!vdso64_enabled)
return 0;

- return map_vdso_randomized(&vdso_image_64);
+ return map_vdso(&vdso_image_64, 0);
}

#ifdef CONFIG_COMPAT
@@ -380,7 +327,7 @@ int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
if (x32) {
if (!vdso64_enabled)
return 0;
- return map_vdso_randomized(&vdso_image_x32);
+ return map_vdso(&vdso_image_x32, 0);
}
#endif
#ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1e16bd5ac781..1fb83d47711f 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -392,5 +392,4 @@ struct va_alignment {
} ____cacheline_aligned;

extern struct va_alignment va_align;
-extern unsigned long align_vdso_addr(unsigned long);
#endif /* _ASM_X86_ELF_H */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index c783aeb37dce..cb9fa1d5c66f 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -52,13 +52,6 @@ static unsigned long get_align_bits(void)
return va_align.bits & get_align_mask();
}

-unsigned long align_vdso_addr(unsigned long addr)
-{
- unsigned long align_mask = get_align_mask();
- addr = (addr + align_mask) & ~align_mask;
- return addr | get_align_bits();
-}
-
static int __init control_va_addr_alignment(char *str)
{
/* guard against enabling this on other CPU families */
--
2.34.1



2024-02-17 05:31:13

by Kees Cook

[permalink] [raw]
Subject: Re: [PATCH] x86/vdso: Move vDSO to mmap region

On Sat, Feb 10, 2024 at 01:18:35AM -0800, Kees Cook wrote:
> The vDSO (and its initial randomization) was introduced in commit
> 2aae950b21e4 ("x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu"),
> but had very low entropy. The entropy was improved in commit
> 394f56fe4801 ("x86_64, vdso: Fix the vdso address randomization algorithm"),
> but there is still improvement to be made.
>
> On principle there should not be executable code at a low entropy offset
> from the stack, since the stack and executable code having separate
> randomization is part of what makes ASLR stronger.
>
> Remove the only executable code near the stack region and give the vDSO
> the same randomized base as other mmap mappings including the linker
> and other shared objects. This results in higher entropy being provided
> and there's little to no advantage in separating this from the existing
> executable code there. This is already how other architectures like
> arm64 handle the vDSO.

Thread ping. Anyone have thoughts on this? I can carry it in -next to
see if anything melts...

--
Kees Cook

Subject: [tip: x86/core] x86/vdso: Move vDSO to mmap region

The following commit has been merged into the x86/core branch of tip:

Commit-ID: 3c6539b4c177695aaa77893c4ce91d21dea7bb3d
Gitweb: https://git.kernel.org/tip/3c6539b4c177695aaa77893c4ce91d21dea7bb3d
Author: Daniel Micay <[email protected]>
AuthorDate: Sat, 10 Feb 2024 01:18:35 -08:00
Committer: Thomas Gleixner <[email protected]>
CommitterDate: Tue, 27 Feb 2024 00:23:55 +01:00

x86/vdso: Move vDSO to mmap region

The vDSO (and its initial randomization) was introduced in commit 2aae950b21e4
("x86_64: Add vDSO for x86-64 with gettimeofday/clock_gettime/getcpu"), but
had very low entropy. The entropy was improved in commit 394f56fe4801
("x86_64, vdso: Fix the vdso address randomization algorithm"), but there
is still improvement to be made.

In principle there should not be executable code at a low entropy offset
from the stack, since the stack and executable code having separate
randomization is part of what makes ASLR stronger.

Remove the only executable code near the stack region and give the vDSO
the same randomized base as other mmap mappings including the linker
and other shared objects. This results in higher entropy being provided
and there's little to no advantage in separating this from the existing
executable code there. This is already how other architectures like
arm64 handle the vDSO.

As an side, while it's sensible for userspace to reserve the initial mmap
base as a region for executable code with a random gap for other mmap
allocations, along with providing randomization within that region, there
isn't much the kernel can do to help due to how dynamic linkers load the
shared objects.

This was extracted from the PaX RANDMMAP feature.

[kees: updated commit log with historical details and other tweaks]

Signed-off-by: Daniel Micay <[email protected]>
Signed-off-by: Kees Cook <[email protected]>
Signed-off-by: Thomas Gleixner <[email protected]>
Closes: https://github.com/KSPP/linux/issues/280
Link: https://lore.kernel.org/r/[email protected]
---
arch/x86/entry/vdso/vma.c | 57 +----------------------------------
arch/x86/include/asm/elf.h | 1 +-
arch/x86/kernel/sys_x86_64.c | 7 +----
3 files changed, 2 insertions(+), 63 deletions(-)

diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c
index 7645730..6d83ceb 100644
--- a/arch/x86/entry/vdso/vma.c
+++ b/arch/x86/entry/vdso/vma.c
@@ -274,59 +274,6 @@ up_fail:
return ret;
}

-#ifdef CONFIG_X86_64
-/*
- * Put the vdso above the (randomized) stack with another randomized
- * offset. This way there is no hole in the middle of address space.
- * To save memory make sure it is still in the same PTE as the stack
- * top. This doesn't give that many random bits.
- *
- * Note that this algorithm is imperfect: the distribution of the vdso
- * start address within a PMD is biased toward the end.
- *
- * Only used for the 64-bit and x32 vdsos.
- */
-static unsigned long vdso_addr(unsigned long start, unsigned len)
-{
- unsigned long addr, end;
- unsigned offset;
-
- /*
- * Round up the start address. It can start out unaligned as a result
- * of stack start randomization.
- */
- start = PAGE_ALIGN(start);
-
- /* Round the lowest possible end address up to a PMD boundary. */
- end = (start + len + PMD_SIZE - 1) & PMD_MASK;
- if (end >= DEFAULT_MAP_WINDOW)
- end = DEFAULT_MAP_WINDOW;
- end -= len;
-
- if (end > start) {
- offset = get_random_u32_below(((end - start) >> PAGE_SHIFT) + 1);
- addr = start + (offset << PAGE_SHIFT);
- } else {
- addr = start;
- }
-
- /*
- * Forcibly align the final address in case we have a hardware
- * issue that requires alignment for performance reasons.
- */
- addr = align_vdso_addr(addr);
-
- return addr;
-}
-
-static int map_vdso_randomized(const struct vdso_image *image)
-{
- unsigned long addr = vdso_addr(current->mm->start_stack, image->size-image->sym_vvar_start);
-
- return map_vdso(image, addr);
-}
-#endif
-
int map_vdso_once(const struct vdso_image *image, unsigned long addr)
{
struct mm_struct *mm = current->mm;
@@ -369,7 +316,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
if (!vdso64_enabled)
return 0;

- return map_vdso_randomized(&vdso_image_64);
+ return map_vdso(&vdso_image_64, 0);
}

#ifdef CONFIG_COMPAT
@@ -380,7 +327,7 @@ int compat_arch_setup_additional_pages(struct linux_binprm *bprm,
if (x32) {
if (!vdso64_enabled)
return 0;
- return map_vdso_randomized(&vdso_image_x32);
+ return map_vdso(&vdso_image_x32, 0);
}
#endif
#ifdef CONFIG_IA32_EMULATION
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index 1e16bd5..1fb83d4 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -392,5 +392,4 @@ struct va_alignment {
} ____cacheline_aligned;

extern struct va_alignment va_align;
-extern unsigned long align_vdso_addr(unsigned long);
#endif /* _ASM_X86_ELF_H */
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
index c783aeb..cb9fa1d 100644
--- a/arch/x86/kernel/sys_x86_64.c
+++ b/arch/x86/kernel/sys_x86_64.c
@@ -52,13 +52,6 @@ static unsigned long get_align_bits(void)
return va_align.bits & get_align_mask();
}

-unsigned long align_vdso_addr(unsigned long addr)
-{
- unsigned long align_mask = get_align_mask();
- addr = (addr + align_mask) & ~align_mask;
- return addr | get_align_bits();
-}
-
static int __init control_va_addr_alignment(char *str)
{
/* guard against enabling this on other CPU families */