2007-01-14 05:31:47

by Roland McGrath

[permalink] [raw]
Subject: [PATCH 1/11] Fix CONFIG_COMPAT_VDSO


I wouldn't mind if CONFIG_COMPAT_VDSO went away entirely.
But if it's there, it should work properly. Currently
it's quite haphazard: both real vma and fixmap are
mapped, both are put in the two different AT_* slots,
sysenter returns to the vma address rather than the
fixmap address, and core dumps yet are another story.

This patch makes CONFIG_COMPAT_VDSO disable the real vma
and use the fixmap area consistently. This makes it
actually compatible with what the old vdso implementation did.

Signed-off-by: Roland McGrath <[email protected]>
---
arch/i386/kernel/entry.S | 4 ++++
arch/i386/kernel/sysenter.c | 2 ++
include/asm-i386/elf.h | 7 +++----
include/asm-i386/fixmap.h | 2 ++
include/asm-i386/page.h | 2 ++
5 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 06461b8..5e47683 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -302,12 +302,16 @@ sysenter_past_esp:
pushl $(__USER_CS)
CFI_ADJUST_CFA_OFFSET 4
/*CFI_REL_OFFSET cs, 0*/
+#ifndef CONFIG_COMPAT_VDSO
/*
* Push current_thread_info()->sysenter_return to the stack.
* A tiny bit of offset fixup is necessary - 4*4 means the 4 words
* pushed above; +8 corresponds to copy_thread's esp0 setting.
*/
pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp)
+#else
+ pushl $SYSENTER_RETURN
+#endif
CFI_ADJUST_CFA_OFFSET 4
CFI_REL_OFFSET eip, 0

diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 7de9117..454d12d 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -100,6 +100,7 @@ int __init sysenter_setup(void)
return 0;
}

+#ifndef CONFIG_COMPAT_VDSO
static struct page *syscall_nopage(struct vm_area_struct *vma,
unsigned long adr, int *type)
{
@@ -187,3 +188,4 @@ int in_gate_area_no_task(unsigned long a
{
return 0;
}
+#endif
diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h
index 45d21a0..0515d61 100644
--- a/include/asm-i386/elf.h
+++ b/include/asm-i386/elf.h
@@ -143,11 +143,8 @@ extern int dump_task_extended_fpu (struc
# define VDSO_PRELINK 0
#endif

-#define VDSO_COMPAT_SYM(x) \
- (VDSO_COMPAT_BASE + (unsigned long)(x) - VDSO_PRELINK)
-
#define VDSO_SYM(x) \
- (VDSO_BASE + (unsigned long)(x) - VDSO_PRELINK)
+ (VDSO_COMPAT_BASE + (unsigned long)(x) - VDSO_PRELINK)

#define VDSO_HIGH_EHDR ((const struct elfhdr *) VDSO_HIGH_BASE)
#define VDSO_EHDR ((const struct elfhdr *) VDSO_COMPAT_BASE)
@@ -156,10 +153,12 @@ extern void __kernel_vsyscall;

#define VDSO_ENTRY VDSO_SYM(&__kernel_vsyscall)

+#ifndef CONFIG_COMPAT_VDSO
#define ARCH_HAS_SETUP_ADDITIONAL_PAGES
struct linux_binprm;
extern int arch_setup_additional_pages(struct linux_binprm *bprm,
int executable_stack);
+#endif

extern unsigned int vdso_enabled;

diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h
index 02428cb..3e9f610 100644
--- a/include/asm-i386/fixmap.h
+++ b/include/asm-i386/fixmap.h
@@ -23,6 +23,8 @@
extern unsigned long __FIXADDR_TOP;
#else
#define __FIXADDR_TOP 0xfffff000
+#define FIXADDR_USER_START __fix_to_virt(FIX_VDSO)
+#define FIXADDR_USER_END __fix_to_virt(FIX_VDSO - 1)
#endif

#ifndef __ASSEMBLY__
diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h
index fd3f64a..7b19f45 100644
--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -143,7 +143,9 @@ extern int page_is_ram(unsigned long pag
#include <asm-generic/memory_model.h>
#include <asm-generic/page.h>

+#ifndef CONFIG_COMPAT_VDSO
#define __HAVE_ARCH_GATE_AREA 1
+#endif
#endif /* __KERNEL__ */

#endif /* _I386_PAGE_H */


2007-01-14 05:33:11

by Roland McGrath

[permalink] [raw]
Subject: [PATCH 2/11] Fix gate_vma.vm_flags


This patch fixes the initialization of gate_vma.vm_flags and
gate_vma.vm_page_prot to reflect reality. This makes the "[vdso]" line in
/proc/PID/maps correctly show r-xp instead of ---p, when gate_vma is used
(CONFIG_COMPAT_VDSO on i386).

Signed-off-by: Roland McGrath <[email protected]>
---
mm/memory.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index af227d2..5beb4b8 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2606,8 +2606,8 @@ static int __init gate_vma_init(void)
gate_vma.vm_mm = NULL;
gate_vma.vm_start = FIXADDR_USER_START;
gate_vma.vm_end = FIXADDR_USER_END;
- gate_vma.vm_page_prot = PAGE_READONLY;
- gate_vma.vm_flags = 0;
+ gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
+ gate_vma.vm_page_prot = __P101;
return 0;
}
__initcall(gate_vma_init);

2007-01-14 05:33:55

by Roland McGrath

[permalink] [raw]
Subject: [PATCH 3/11] Add VM_ALWAYSDUMP


This patch adds the VM_ALWAYSDUMP flag for vm_flags in vm_area_struct.
This provides a clean explicit way to have a vma always included in core
dumps, as is needed for vDSO's.

Signed-off-by: Roland McGrath <[email protected]>
---
fs/binfmt_elf.c | 4 ++++
include/linux/mm.h | 1 +
2 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7cb2872..6fec8bf 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1178,6 +1178,10 @@ static int dump_seek(struct file *file,
*/
static int maydump(struct vm_area_struct *vma)
{
+ /* The vma can be set up to tell us the answer directly. */
+ if (vma->vm_flags & VM_ALWAYSDUMP)
+ return 1;
+
/* Do not dump I/O mapped devices or special mappings */
if (vma->vm_flags & (VM_IO | VM_RESERVED))
return 0;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7691223..2d2c08d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -168,6 +168,7 @@ extern unsigned int kobjsize(const void
#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */
#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */
+#define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */

#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */
#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS

2007-01-14 05:34:34

by Roland McGrath

[permalink] [raw]
Subject: [PATCH 4/11] i386 vDSO: use VM_ALWAYSDUMP


This patch fixes core dumps to include the vDSO vma, which is left out now.
It removes the special-case core writing macros, which were not doing the
right thing for the vDSO vma anyway. Instead, it uses VM_ALWAYSDUMP in the
vma; there is no need for the fixmap page to be installed. It handles the
CONFIG_COMPAT_VDSO case by making elf_core_dump use the fake vma from
get_gate_vma after real vmas in the same way the /proc/PID/maps code does.

This changes core dumps so they no longer include the non-PT_LOAD phdrs
from the vDSO. I made the change to add them in the first place, but in
turned out that nothing ever wanted them there since the advent of NT_AUXV.
It's cleaner to leave them out, and just let the phdrs inside the vDSO
image speak for themselves.

Signed-off-by: Roland McGrath <[email protected]>
---
arch/i386/kernel/sysenter.c | 12 ++++++----
fs/binfmt_elf.c | 12 ++++++++--
include/asm-i386/elf.h | 44 -------------------------------------------
mm/memory.c | 7 ++++++
4 files changed, 23 insertions(+), 52 deletions(-)

diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 454d12d..5da7442 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -79,11 +79,6 @@ int __init sysenter_setup(void)
#ifdef CONFIG_COMPAT_VDSO
__set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY);
printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO));
-#else
- /*
- * In the non-compat case the ELF coredumping code needs the fixmap:
- */
- __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_KERNEL_RO);
#endif

if (!boot_cpu_has(X86_FEATURE_SEP)) {
@@ -147,6 +142,13 @@ int arch_setup_additional_pages(struct l
vma->vm_end = addr + PAGE_SIZE;
/* MAYWRITE to allow gdb to COW and set breakpoints */
vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
+ /*
+ * Make sure the vDSO gets into every core dump.
+ * Dumping its contents makes post-mortem fully interpretable later
+ * without matching up the same kernel and hardware config to see
+ * what PC values meant.
+ */
+ vma->vm_flags |= VM_ALWAYSDUMP;
vma->vm_flags |= mm->def_flags;
vma->vm_page_prot = protection_map[vma->vm_flags & 7];
vma->vm_ops = &syscall_vm_ops;
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 6fec8bf..4ee7cf5 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1443,7 +1443,7 @@ static int elf_core_dump(long signr, str
int segs;
size_t size = 0;
int i;
- struct vm_area_struct *vma;
+ struct vm_area_struct *vma, *gate_vma;
struct elfhdr *elf = NULL;
loff_t offset = 0, dataoff, foffset;
unsigned long limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
@@ -1529,6 +1529,10 @@ static int elf_core_dump(long signr, str
segs += ELF_CORE_EXTRA_PHDRS;
#endif

+ gate_vma = get_gate_vma(current);
+ if (gate_vma != NULL)
+ segs++;
+
/* Set up header */
fill_elf_header(elf, segs + 1); /* including notes section */

@@ -1596,7 +1600,8 @@ static int elf_core_dump(long signr, str
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);

/* Write program headers for segments dump */
- for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
+ for (vma = current->mm->mmap; vma != NULL;
+ vma = vma->vm_next ?: vma == gate_vma ? NULL : gate_vma) {
struct elf_phdr phdr;
size_t sz;

@@ -1645,7 +1650,8 @@ static int elf_core_dump(long signr, str
/* Align to page */
DUMP_SEEK(dataoff - foffset);

- for (vma = current->mm->mmap; vma != NULL; vma = vma->vm_next) {
+ for (vma = current->mm->mmap; vma != NULL;
+ vma = vma->vm_next ?: vma == gate_vma ? NULL : gate_vma) {
unsigned long addr;

if (!maydump(vma))
diff --git a/include/asm-i386/elf.h b/include/asm-i386/elf.h
index 0515d61..369035d 100644
--- a/include/asm-i386/elf.h
+++ b/include/asm-i386/elf.h
@@ -168,50 +168,6 @@ do if (vdso_enabled) { \
NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_COMPAT_BASE); \
} while (0)

-/*
- * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
- * extra segments containing the vsyscall DSO contents. Dumping its
- * contents makes post-mortem fully interpretable later without matching up
- * the same kernel and hardware config to see what PC values meant.
- * Dumping its extra ELF program headers includes all the other information
- * a debugger needs to easily find how the vsyscall DSO was being used.
- */
-#define ELF_CORE_EXTRA_PHDRS (VDSO_HIGH_EHDR->e_phnum)
-#define ELF_CORE_WRITE_EXTRA_PHDRS \
-do { \
- const struct elf_phdr *const vsyscall_phdrs = \
- (const struct elf_phdr *) (VDSO_HIGH_BASE \
- + VDSO_HIGH_EHDR->e_phoff); \
- int i; \
- Elf32_Off ofs = 0; \
- for (i = 0; i < VDSO_HIGH_EHDR->e_phnum; ++i) { \
- struct elf_phdr phdr = vsyscall_phdrs[i]; \
- if (phdr.p_type == PT_LOAD) { \
- BUG_ON(ofs != 0); \
- ofs = phdr.p_offset = offset; \
- phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz); \
- phdr.p_filesz = phdr.p_memsz; \
- offset += phdr.p_filesz; \
- } \
- else \
- phdr.p_offset += ofs; \
- phdr.p_paddr = 0; /* match other core phdrs */ \
- DUMP_WRITE(&phdr, sizeof(phdr)); \
- } \
-} while (0)
-#define ELF_CORE_WRITE_EXTRA_DATA \
-do { \
- const struct elf_phdr *const vsyscall_phdrs = \
- (const struct elf_phdr *) (VDSO_HIGH_BASE \
- + VDSO_HIGH_EHDR->e_phoff); \
- int i; \
- for (i = 0; i < VDSO_HIGH_EHDR->e_phnum; ++i) { \
- if (vsyscall_phdrs[i].p_type == PT_LOAD) \
- DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr, \
- PAGE_ALIGN(vsyscall_phdrs[i].p_memsz)); \
- } \
-} while (0)
-
#endif

#endif
diff --git a/mm/memory.c b/mm/memory.c
index 5beb4b8..ef09f0a 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2608,6 +2608,13 @@ static int __init gate_vma_init(void)
gate_vma.vm_end = FIXADDR_USER_END;
gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC;
gate_vma.vm_page_prot = __P101;
+ /*
+ * Make sure the vDSO gets into every core dump.
+ * Dumping its contents makes post-mortem fully interpretable later
+ * without matching up the same kernel and hardware config to see
+ * what PC values meant.
+ */
+ gate_vma.vm_flags |= VM_ALWAYSDUMP;
return 0;
}
__initcall(gate_vma_init);

2007-01-14 05:35:33

by Roland McGrath

[permalink] [raw]
Subject: [PATCH 5/11] x86_64 ia32 vDSO: use VM_ALWAYSDUMP


This patch fixes ia32 core dumps on x86_64 to include just one phdr for the
vDSO vma. Currently it writes a confused format with two phdrs for the
address, one without contents and one with. This patch removes the
special-case core writing macros for the ia32 vDSO. Instead, it uses
VM_ALWAYSDUMP in the vma. This changes core dumps so they no longer
include the non-PT_LOAD phdrs from the vDSO, consistent with fixed native
i386 core dumps.

Signed-off-by: Roland McGrath <[email protected]>
---
arch/x86_64/ia32/ia32_binfmt.c | 49 ----------------------------------------
arch/x86_64/ia32/syscall32.c | 7 +++++
2 files changed, 7 insertions(+), 49 deletions(-)

diff --git a/arch/x86_64/ia32/ia32_binfmt.c b/arch/x86_64/ia32/ia32_binfmt.c
index 543ef4f..5ce0bd4 100644
--- a/arch/x86_64/ia32/ia32_binfmt.c
+++ b/arch/x86_64/ia32/ia32_binfmt.c
@@ -64,55 +64,6 @@ typedef unsigned int elf_greg_t;
#define ELF_NGREG (sizeof (struct user_regs_struct32) / sizeof(elf_greg_t))
typedef elf_greg_t elf_gregset_t[ELF_NGREG];

-/*
- * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
- * extra segments containing the vsyscall DSO contents. Dumping its
- * contents makes post-mortem fully interpretable later without matching up
- * the same kernel and hardware config to see what PC values meant.
- * Dumping its extra ELF program headers includes all the other information
- * a debugger needs to easily find how the vsyscall DSO was being used.
- */
-#define ELF_CORE_EXTRA_PHDRS (find_vma(current->mm, VSYSCALL32_BASE) ? \
- (VSYSCALL32_EHDR->e_phnum) : 0)
-#define ELF_CORE_WRITE_EXTRA_PHDRS \
-do { \
- if (find_vma(current->mm, VSYSCALL32_BASE)) { \
- const struct elf32_phdr *const vsyscall_phdrs = \
- (const struct elf32_phdr *) (VSYSCALL32_BASE \
- + VSYSCALL32_EHDR->e_phoff);\
- int i; \
- Elf32_Off ofs = 0; \
- for (i = 0; i < VSYSCALL32_EHDR->e_phnum; ++i) { \
- struct elf32_phdr phdr = vsyscall_phdrs[i]; \
- if (phdr.p_type == PT_LOAD) { \
- BUG_ON(ofs != 0); \
- ofs = phdr.p_offset = offset; \
- phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz); \
- phdr.p_filesz = phdr.p_memsz; \
- offset += phdr.p_filesz; \
- } \
- else \
- phdr.p_offset += ofs; \
- phdr.p_paddr = 0; /* match other core phdrs */ \
- DUMP_WRITE(&phdr, sizeof(phdr)); \
- } \
- } \
-} while (0)
-#define ELF_CORE_WRITE_EXTRA_DATA \
-do { \
- if (find_vma(current->mm, VSYSCALL32_BASE)) { \
- const struct elf32_phdr *const vsyscall_phdrs = \
- (const struct elf32_phdr *) (VSYSCALL32_BASE \
- + VSYSCALL32_EHDR->e_phoff); \
- int i; \
- for (i = 0; i < VSYSCALL32_EHDR->e_phnum; ++i) { \
- if (vsyscall_phdrs[i].p_type == PT_LOAD) \
- DUMP_WRITE((void *) (u64) vsyscall_phdrs[i].p_vaddr,\
- PAGE_ALIGN(vsyscall_phdrs[i].p_memsz)); \
- } \
- } \
-} while (0)
-
struct elf_siginfo
{
int si_signo; /* signal number */
diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index 3e5ed20..3ac9355 100644
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -59,6 +59,13 @@ int syscall32_setup_pages(struct linux_b
vma->vm_end = VSYSCALL32_END;
/* MAYWRITE to allow gdb to COW and set breakpoints */
vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
+ /*
+ * Make sure the vDSO gets into every core dump.
+ * Dumping its contents makes post-mortem fully interpretable later
+ * without matching up the same kernel and hardware config to see
+ * what PC values meant.
+ */
+ vma->vm_flags |= VM_ALWAYSDUMP;
vma->vm_flags |= mm->def_flags;
vma->vm_page_prot = protection_map[vma->vm_flags & 7];
vma->vm_ops = &syscall32_vm_ops;

2007-01-14 05:35:37

by Roland McGrath

[permalink] [raw]
Subject: [PATCH 6/11] powerpc vDSO: use VM_ALWAYSDUMP


This patch fixes core dumps to include the vDSO vma, which is left out now.

Signed-off-by: Roland McGrath <[email protected]>
---
arch/powerpc/kernel/vdso.c | 7 +++++++
1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index a4b28c7..ae0ede1 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -284,6 +284,13 @@ int arch_setup_additional_pages(struct l
* pages though
*/
vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC;
+ /*
+ * Make sure the vDSO gets into every core dump.
+ * Dumping its contents makes post-mortem fully interpretable later
+ * without matching up the same kernel and hardware config to see
+ * what PC values meant.
+ */
+ vma->vm_flags |= VM_ALWAYSDUMP;
vma->vm_flags |= mm->def_flags;
vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
vma->vm_ops = &vdso_vmops;

2007-01-14 05:36:36

by Roland McGrath

[permalink] [raw]
Subject: [PATCH 7/11] x86_64 ia32 vDSO: define arch_vma_name


This patch makes x86_64 define arch_vma_name for CONFIG_IA32_EMULATION.
This makes the ia32 vDSO mapping appear in /proc/PID/maps with "[vdso]"
for ia32 processes, as it does on native i386.

Signed-off-by: Roland McGrath <[email protected]>
---
arch/x86_64/ia32/syscall32.c | 8 ++++++++
1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index 3ac9355..59f1fa1 100644
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -82,6 +82,14 @@ int syscall32_setup_pages(struct linux_b
return 0;
}

+const char *arch_vma_name(struct vm_area_struct *vma)
+{
+ if (vma->vm_start == VSYSCALL32_BASE &&
+ vma->vm_mm && vma->vm_mm->task_size == IA32_PAGE_OFFSET)
+ return "[vdso]";
+ return NULL;
+}
+
static int __init init_syscall32(void)
{
syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);

2007-01-14 05:36:40

by Roland McGrath

[permalink] [raw]
Subject: [PATCH 8/11] Add install_special_mapping


This patchs adds a utility function install_special_mapping, for creating a
special vma using a fixed set of preallocated pages as backing, such as for
a vDSO. This consolidates some nearly identical code used for vDSO mapping
reimplemented for different architectures.

Signed-off-by: Roland McGrath <[email protected]>
---
include/linux/mm.h | 3 ++
mm/mmap.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 75 insertions(+), 0 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2d2c08d..bb793a4 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1030,6 +1030,9 @@ extern struct vm_area_struct *copy_vma(s
unsigned long addr, unsigned long len, pgoff_t pgoff);
extern void exit_mmap(struct mm_struct *);
extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
+extern int install_special_mapping(struct mm_struct *mm,
+ unsigned long addr, unsigned long len,
+ unsigned long flags, struct page **pages);

extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);

diff --git a/mm/mmap.c b/mm/mmap.c
index 9717337..b540fb2 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2094,3 +2094,75 @@ int may_expand_vm(struct mm_struct *mm,
return 0;
return 1;
}
+
+
+static struct page *special_mapping_nopage(struct vm_area_struct *vma,
+ unsigned long address, int *type)
+{
+ struct page **pages;
+
+ BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+
+ address -= vma->vm_start;
+ for (pages = vma->vm_private_data; address > 0 && *pages; ++pages)
+ address -= PAGE_SIZE;
+
+ if (*pages) {
+ struct page *page = *pages;
+ get_page(page);
+ return page;
+ }
+
+ return NOPAGE_SIGBUS;
+}
+
+/*
+ * Having a close hook prevents vma merging regardless of flags.
+ */
+static void special_mapping_close(struct vm_area_struct *vma)
+{
+}
+
+static struct vm_operations_struct special_mapping_vmops = {
+ .close = special_mapping_close,
+ .nopage = special_mapping_nopage,
+};
+
+/*
+ * Called with mm->mmap_sem held for writing.
+ * Insert a new vma covering the given region, with the given flags.
+ * Its pages are supplied by the given array of struct page *.
+ * The array can be shorter than len >> PAGE_SHIFT if it's null-terminated.
+ * The region past the last page supplied will always produce SIGBUS.
+ * The array pointer and the pages it points to are assumed to stay alive
+ * for as long as this mapping might exist.
+ */
+int install_special_mapping(struct mm_struct *mm,
+ unsigned long addr, unsigned long len,
+ unsigned long vm_flags, struct page **pages)
+{
+ struct vm_area_struct *vma;
+
+ vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+ if (unlikely(vma == NULL))
+ return -ENOMEM;
+
+ vma->vm_mm = mm;
+ vma->vm_start = addr;
+ vma->vm_end = addr + len;
+
+ vma->vm_flags = vm_flags | mm->def_flags;
+ vma->vm_page_prot = protection_map[vma->vm_flags & 7];
+
+ vma->vm_ops = &special_mapping_vmops;
+ vma->vm_private_data = pages;
+
+ if (unlikely(insert_vm_struct(mm, vma))) {
+ kmem_cache_free(vm_area_cachep, vma);
+ return -ENOMEM;
+ }
+
+ mm->total_vm += len >> PAGE_SHIFT;
+
+ return 0;
+}

2007-01-14 05:37:05

by Roland McGrath

[permalink] [raw]
Subject: [PATCH 9/11] i386 vDSO: use install_special_mapping


This patch uses install_special_mapping for the i386 vDSO setup,
consolidating duplicated code.

Signed-off-by: Roland McGrath <[email protected]>
---
arch/i386/kernel/sysenter.c | 53 +++++++++----------------------------------
1 files changed, 11 insertions(+), 42 deletions(-)

diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c
index 5da7442..bc882a2 100644
--- a/arch/i386/kernel/sysenter.c
+++ b/arch/i386/kernel/sysenter.c
@@ -70,11 +70,12 @@ void enable_sep_cpu(void)
*/
extern const char vsyscall_int80_start, vsyscall_int80_end;
extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
-static void *syscall_page;
+static struct page *syscall_pages[1];

int __init sysenter_setup(void)
{
- syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+ void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+ syscall_pages[0] = virt_to_page(syscall_page);

#ifdef CONFIG_COMPAT_VDSO
__set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY);
@@ -96,31 +97,12 @@ int __init sysenter_setup(void)
}

#ifndef CONFIG_COMPAT_VDSO
-static struct page *syscall_nopage(struct vm_area_struct *vma,
- unsigned long adr, int *type)
-{
- struct page *p = virt_to_page(adr - vma->vm_start + syscall_page);
- get_page(p);
- return p;
-}
-
-/* Prevent VMA merging */
-static void syscall_vma_close(struct vm_area_struct *vma)
-{
-}
-
-static struct vm_operations_struct syscall_vm_ops = {
- .close = syscall_vma_close,
- .nopage = syscall_nopage,
-};
-
/* Defined in vsyscall-sysenter.S */
extern void SYSENTER_RETURN;

/* Setup a VMA at program startup for the vsyscall page */
int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack)
{
- struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
unsigned long addr;
int ret;
@@ -132,38 +114,25 @@ int arch_setup_additional_pages(struct l
goto up_fail;
}

- vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
- if (!vma) {
- ret = -ENOMEM;
- goto up_fail;
- }
-
- vma->vm_start = addr;
- vma->vm_end = addr + PAGE_SIZE;
- /* MAYWRITE to allow gdb to COW and set breakpoints */
- vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
/*
+ * MAYWRITE to allow gdb to COW and set breakpoints
+ *
* Make sure the vDSO gets into every core dump.
* Dumping its contents makes post-mortem fully interpretable later
* without matching up the same kernel and hardware config to see
* what PC values meant.
*/
- vma->vm_flags |= VM_ALWAYSDUMP;
- vma->vm_flags |= mm->def_flags;
- vma->vm_page_prot = protection_map[vma->vm_flags & 7];
- vma->vm_ops = &syscall_vm_ops;
- vma->vm_mm = mm;
-
- ret = insert_vm_struct(mm, vma);
- if (unlikely(ret)) {
- kmem_cache_free(vm_area_cachep, vma);
+ ret = install_special_mapping(mm, addr, PAGE_SIZE,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+ VM_ALWAYSDUMP,
+ syscall_pages);
+ if (ret)
goto up_fail;
- }

current->mm->context.vdso = (void *)addr;
current_thread_info()->sysenter_return =
(void *)VDSO_SYM(&SYSENTER_RETURN);
- mm->total_vm++;
up_fail:
up_write(&mm->mmap_sem);
return ret;

2007-01-14 05:38:03

by Roland McGrath

[permalink] [raw]
Subject: [PATCH 10/11] x86_64 ia32 vDSO: use install_special_mapping


This patch uses install_special_mapping for the ia32 vDSO setup,
consolidating duplicated code.

Signed-off-by: Roland McGrath <[email protected]>
---
arch/x86_64/ia32/syscall32.c | 75 ++++++++++++------------------------------
include/asm-x86_64/proto.h | 1 -
2 files changed, 21 insertions(+), 55 deletions(-)

diff --git a/arch/x86_64/ia32/syscall32.c b/arch/x86_64/ia32/syscall32.c
index 59f1fa1..3939f10 100644
--- a/arch/x86_64/ia32/syscall32.c
+++ b/arch/x86_64/ia32/syscall32.c
@@ -18,68 +18,34 @@ extern unsigned char syscall32_syscall[]
extern unsigned char syscall32_sysenter[], syscall32_sysenter_end[];
extern int sysctl_vsyscall32;

-char *syscall32_page;
+static struct page *syscall32_pages[1];
static int use_sysenter = -1;

-static struct page *
-syscall32_nopage(struct vm_area_struct *vma, unsigned long adr, int *type)
-{
- struct page *p = virt_to_page(adr - vma->vm_start + syscall32_page);
- get_page(p);
- return p;
-}
-
-/* Prevent VMA merging */
-static void syscall32_vma_close(struct vm_area_struct *vma)
-{
-}
-
-static struct vm_operations_struct syscall32_vm_ops = {
- .close = syscall32_vma_close,
- .nopage = syscall32_nopage,
-};
-
struct linux_binprm;

/* Setup a VMA at program startup for the vsyscall page */
int syscall32_setup_pages(struct linux_binprm *bprm, int exstack)
{
- int npages = (VSYSCALL32_END - VSYSCALL32_BASE) >> PAGE_SHIFT;
- struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
int ret;

- vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
- if (!vma)
- return -ENOMEM;
-
- memset(vma, 0, sizeof(struct vm_area_struct));
- /* Could randomize here */
- vma->vm_start = VSYSCALL32_BASE;
- vma->vm_end = VSYSCALL32_END;
- /* MAYWRITE to allow gdb to COW and set breakpoints */
- vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
+ down_write(&mm->mmap_sem);
/*
+ * MAYWRITE to allow gdb to COW and set breakpoints
+ *
* Make sure the vDSO gets into every core dump.
* Dumping its contents makes post-mortem fully interpretable later
* without matching up the same kernel and hardware config to see
* what PC values meant.
*/
- vma->vm_flags |= VM_ALWAYSDUMP;
- vma->vm_flags |= mm->def_flags;
- vma->vm_page_prot = protection_map[vma->vm_flags & 7];
- vma->vm_ops = &syscall32_vm_ops;
- vma->vm_mm = mm;
-
- down_write(&mm->mmap_sem);
- if ((ret = insert_vm_struct(mm, vma))) {
- up_write(&mm->mmap_sem);
- kmem_cache_free(vm_area_cachep, vma);
- return ret;
- }
- mm->total_vm += npages;
+ /* Could randomize here */
+ ret = install_special_mapping(mm, VSYSCALL32_BASE, PAGE_SIZE,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+ VM_ALWAYSDUMP,
+ syscall32_pages);
up_write(&mm->mmap_sem);
- return 0;
+ return ret;
}

const char *arch_vma_name(struct vm_area_struct *vma)
@@ -92,9 +58,10 @@ const char *arch_vma_name(struct vm_area

static int __init init_syscall32(void)
{
- syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
+ char *syscall32_page = (void *)get_zeroed_page(GFP_KERNEL);
if (!syscall32_page)
panic("Cannot allocate syscall32 page");
+ syscall32_pages[0] = virt_to_page(syscall32_page);
if (use_sysenter > 0) {
memcpy(syscall32_page, syscall32_sysenter,
syscall32_sysenter_end - syscall32_sysenter);
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index 6d324b8..a6d2ff5 100644
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -81,7 +81,6 @@ extern void swap_low_mappings(void);
extern void __show_regs(struct pt_regs * regs);
extern void show_regs(struct pt_regs * regs);

-extern char *syscall32_page;
extern void syscall32_cpu_init(void);

extern void setup_node_bootmem(int nodeid, unsigned long start, unsigned long end);

2007-01-14 05:38:21

by Roland McGrath

[permalink] [raw]
Subject: [PATCH 11/11] powerpc vDSO: use install_special_mapping


This patch uses install_special_mapping for the powerpc vDSO setup,
consolidating duplicated code.

Signed-off-by: Roland McGrath <[email protected]>
---
arch/powerpc/kernel/vdso.c | 104 +++++++++++--------------------------------
1 files changed, 27 insertions(+), 77 deletions(-)

diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index ae0ede1..50149ec 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -49,9 +49,13 @@
/* Max supported size for symbol names */
#define MAX_SYMNAME 64

+#define VDSO32_MAXPAGES (((0x3000 + PAGE_MASK) >> PAGE_SHIFT) + 2)
+#define VDSO64_MAXPAGES (((0x3000 + PAGE_MASK) >> PAGE_SHIFT) + 2)
+
extern char vdso32_start, vdso32_end;
static void *vdso32_kbase = &vdso32_start;
unsigned int vdso32_pages;
+static struct page *vdso32_pagelist[VDSO32_MAXPAGES];
unsigned long vdso32_sigtramp;
unsigned long vdso32_rt_sigtramp;

@@ -59,6 +63,7 @@ unsigned long vdso32_rt_sigtramp;
extern char vdso64_start, vdso64_end;
static void *vdso64_kbase = &vdso64_start;
unsigned int vdso64_pages;
+static struct page *vdso64_pagelist[VDSO64_MAXPAGES];
unsigned long vdso64_rt_sigtramp;
#endif /* CONFIG_PPC64 */

@@ -165,55 +170,6 @@ static void dump_vdso_pages(struct vm_ar
#endif /* DEBUG */

/*
- * Keep a dummy vma_close for now, it will prevent VMA merging.
- */
-static void vdso_vma_close(struct vm_area_struct * vma)
-{
-}
-
-/*
- * Our nopage() function, maps in the actual vDSO kernel pages, they will
- * be mapped read-only by do_no_page(), and eventually COW'ed, either
- * right away for an initial write access, or by do_wp_page().
- */
-static struct page * vdso_vma_nopage(struct vm_area_struct * vma,
- unsigned long address, int *type)
-{
- unsigned long offset = address - vma->vm_start;
- struct page *pg;
-#ifdef CONFIG_PPC64
- void *vbase = (vma->vm_mm->task_size > TASK_SIZE_USER32) ?
- vdso64_kbase : vdso32_kbase;
-#else
- void *vbase = vdso32_kbase;
-#endif
-
- DBG("vdso_vma_nopage(current: %s, address: %016lx, off: %lx)\n",
- current->comm, address, offset);
-
- if (address < vma->vm_start || address > vma->vm_end)
- return NOPAGE_SIGBUS;
-
- /*
- * Last page is systemcfg.
- */
- if ((vma->vm_end - address) <= PAGE_SIZE)
- pg = virt_to_page(vdso_data);
- else
- pg = virt_to_page(vbase + offset);
-
- get_page(pg);
- DBG(" ->page count: %d\n", page_count(pg));
-
- return pg;
-}
-
-static struct vm_operations_struct vdso_vmops = {
- .close = vdso_vma_close,
- .nopage = vdso_vma_nopage,
-};
-
-/*
* This is called from binfmt_elf, we create the special vma for the
* vDSO and insert it into the mm struct tree
*/
@@ -221,20 +177,23 @@ int arch_setup_additional_pages(struct l
int executable_stack)
{
struct mm_struct *mm = current->mm;
- struct vm_area_struct *vma;
+ struct page **vdso_pagelist;
unsigned long vdso_pages;
unsigned long vdso_base;
int rc;

#ifdef CONFIG_PPC64
if (test_thread_flag(TIF_32BIT)) {
+ vdso_pagelist = vdso32_pagelist;
vdso_pages = vdso32_pages;
vdso_base = VDSO32_MBASE;
} else {
+ vdso_pagelist = vdso64_pagelist;
vdso_pages = vdso64_pages;
vdso_base = VDSO64_MBASE;
}
#else
+ vdso_pagelist = vdso32_pagelist;
vdso_pages = vdso32_pages;
vdso_base = VDSO32_MBASE;
#endif
@@ -262,17 +221,6 @@ int arch_setup_additional_pages(struct l
goto fail_mmapsem;
}

-
- /* Allocate a VMA structure and fill it up */
- vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
- if (vma == NULL) {
- rc = -ENOMEM;
- goto fail_mmapsem;
- }
- vma->vm_mm = mm;
- vma->vm_start = vdso_base;
- vma->vm_end = vma->vm_start + (vdso_pages << PAGE_SHIFT);
-
/*
* our vma flags don't have VM_WRITE so by default, the process isn't
* allowed to write those pages.
@@ -282,32 +230,26 @@ int arch_setup_additional_pages(struct l
* and your nice userland gettimeofday will be totally dead.
* It's fine to use that for setting breakpoints in the vDSO code
* pages though
- */
- vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC;
- /*
+ *
* Make sure the vDSO gets into every core dump.
* Dumping its contents makes post-mortem fully interpretable later
* without matching up the same kernel and hardware config to see
* what PC values meant.
*/
- vma->vm_flags |= VM_ALWAYSDUMP;
- vma->vm_flags |= mm->def_flags;
- vma->vm_page_prot = protection_map[vma->vm_flags & 0x7];
- vma->vm_ops = &vdso_vmops;
-
- /* Insert new VMA */
- rc = insert_vm_struct(mm, vma);
+ rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
+ VM_READ|VM_EXEC|
+ VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC|
+ VM_ALWAYSDUMP,
+ vdso_pagelist);
if (rc)
- goto fail_vma;
+ goto fail_mmapsem;

- /* Put vDSO base into mm struct and account for memory usage */
+ /* Put vDSO base into mm struct */
current->mm->context.vdso_base = vdso_base;
- mm->total_vm += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+
up_write(&mm->mmap_sem);
return 0;

- fail_vma:
- kmem_cache_free(vm_area_cachep, vma);
fail_mmapsem:
up_write(&mm->mmap_sem);
return rc;
@@ -778,18 +720,26 @@ void __init vdso_init(void)
}

/* Make sure pages are in the correct state */
+ BUG_ON(vdso32_pages + 2 > VDSO32_MAXPAGES);
for (i = 0; i < vdso32_pages; i++) {
struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
ClearPageReserved(pg);
get_page(pg);
-
+ vdso32_pagelist[i] = pg;
}
+ vdso32_pagelist[i++] = virt_to_page(vdso_data);
+ vdso32_pagelist[i] = NULL;
+
#ifdef CONFIG_PPC64
+ BUG_ON(vdso64_pages + 2 > VDSO64_MAXPAGES);
for (i = 0; i < vdso64_pages; i++) {
struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
ClearPageReserved(pg);
get_page(pg);
+ vdso64_pagelist[i] = pg;
}
+ vdso64_pagelist[i++] = virt_to_page(vdso_data);
+ vdso64_pagelist[i] = NULL;
#endif /* CONFIG_PPC64 */

get_page(virt_to_page(vdso_data));

2007-01-15 11:58:38

by Al Boldi

[permalink] [raw]
Subject: Re: [PATCH 1/11] Fix CONFIG_COMPAT_VDSO

Roland McGrath wrote:
>
> I wouldn't mind if CONFIG_COMPAT_VDSO went away entirely.
> But if it's there, it should work properly. Currently
> it's quite haphazard: both real vma and fixmap are
> mapped, both are put in the two different AT_* slots,
> sysenter returns to the vma address rather than the
> fixmap address, and core dumps yet are another story.
>
> This patch makes CONFIG_COMPAT_VDSO disable the real vma
> and use the fixmap area consistently. This makes it
> actually compatible with what the old vdso implementation did.

I just tried your patch, but your changes seem to revert performance
improvements achieved with 2.6.19, when vdso_enabled=1 and
randomize_va_space=0.


Thanks!

--
Al

2007-01-17 08:51:15

by Ingo Molnar

[permalink] [raw]
Subject: Re: [PATCH 1/11] Fix CONFIG_COMPAT_VDSO


* Roland McGrath <[email protected]> wrote:

> I wouldn't mind if CONFIG_COMPAT_VDSO went away entirely. But if it's
> there, it should work properly. Currently it's quite haphazard: both
> real vma and fixmap are mapped, both are put in the two different AT_*
> slots, sysenter returns to the vma address rather than the fixmap
> address, and core dumps yet are another story.

i think your patches #1...#7 are must-haves for v2.6.20, while #8-#11
could be delayed to v2.6.21?

Ingo

2007-01-17 09:03:45

by Roland McGrath

[permalink] [raw]
Subject: Re: [PATCH 1/11] Fix CONFIG_COMPAT_VDSO

> i think your patches #1...#7 are must-haves for v2.6.20, while #8-#11
> could be delayed to v2.6.21?

Indeed 1-7 are fixes while 8-11 are only cleanups not changing behavior.


Thanks,
Roland

2007-01-23 19:48:41

by Andrew Morton

[permalink] [raw]
Subject: Re: [PATCH 4/11] i386 vDSO: use VM_ALWAYSDUMP

On Sat, 13 Jan 2007 21:34:28 -0800 (PST)
Roland McGrath <[email protected]> wrote:

> + vma = vma->vm_next ?: vma == gate_vma ? NULL : gate_vma) {

Painful. Can we do this?


diff -puN fs/binfmt_elf.c~i386-vdso-use-vm_alwaysdump-tidy fs/binfmt_elf.c
--- a/fs/binfmt_elf.c~i386-vdso-use-vm_alwaysdump-tidy
+++ a/fs/binfmt_elf.c
@@ -1429,6 +1429,23 @@ static int elf_dump_thread_status(long s
}

/*
+ * Helper function for iterating across a vma list. It ensures that the caller
+ * will visit `gate_vma' prior to terminating the search.
+ */
+static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
+ struct vm_area_struct *gate_vma)
+{
+ struct vm_area_struct *ret;
+
+ ret = this_vma->vm_next;
+ if (ret)
+ return ret;
+ if (this_vma == gate_vma)
+ return NULL;
+ return gate_vma;
+}
+
+/*
* Actual dumper
*
* This is a two-pass process; first we find the offsets of the bits,
@@ -1600,8 +1617,7 @@ static int elf_core_dump(long signr, str
dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);

/* Write program headers for segments dump */
- for (vma = current->mm->mmap; vma != NULL;
- vma = vma->vm_next ?: vma == gate_vma ? NULL : gate_vma) {
+ for (vma = current->mm->mmap; vma; vma = next_vma(vma, gate_vma)) {
struct elf_phdr phdr;
size_t sz;

@@ -1650,8 +1666,7 @@ static int elf_core_dump(long signr, str
/* Align to page */
DUMP_SEEK(dataoff - foffset);

- for (vma = current->mm->mmap; vma != NULL;
- vma = vma->vm_next ?: vma == gate_vma ? NULL : gate_vma) {
+ for (vma = current->mm->mmap; vma; vma = next_vma(vma, gate_vma)) {
unsigned long addr;

if (!maydump(vma))
_

2007-01-23 19:57:22

by Linus Torvalds

[permalink] [raw]
Subject: Re: [PATCH 4/11] i386 vDSO: use VM_ALWAYSDUMP



On Tue, 23 Jan 2007, Andrew Morton wrote:
>
> /*
> + * Helper function for iterating across a vma list. It ensures that the caller
> + * will visit `gate_vma' prior to terminating the search.

Well, the comment is wrong. The code doesn't actually visit 'gate_vma' if
the list of VMA's is empty.

Not that the old code did either, so it's not like it's a new bug, but I
thought I'd point it out anyway. As if we care (but you can probably
trigger this by having an app that does

munmap(NULL, TASK_SIZE);

which will cause a SIGSEGV on return (because the stack doesn't exist) and
then the core-dump should be empty.

Not that I tested anythign that evil anyway, nor do I think we really care
if it means that the gate_vma doesn't get shown in the core-dump either.

Linus

2007-01-23 19:58:13

by Roland McGrath

[permalink] [raw]
Subject: Re: [PATCH 4/11] i386 vDSO: use VM_ALWAYSDUMP

> On Sat, 13 Jan 2007 21:34:28 -0800 (PST)
> Roland McGrath <[email protected]> wrote:
>
> > + vma = vma->vm_next ?: vma == gate_vma ? NULL : gate_vma) {
>
> Painful. Can we do this?

Can't stand concise, eh? ;-) Your version is fine with me.


Thanks,
Roland

2007-01-23 20:11:42

by Roland McGrath

[permalink] [raw]
Subject: Re: [PATCH 4/11] i386 vDSO: use VM_ALWAYSDUMP

> Not that the old code did either, so it's not like it's a new bug, but I
> thought I'd point it out anyway. As if we care (but you can probably
> trigger this by having an app that does
>
> munmap(NULL, TASK_SIZE);
>
> which will cause a SIGSEGV on return (because the stack doesn't exist) and
> then the core-dump should be empty.

Ok, damn you. I admit I thought of exactly this case and then glossed it
over because I didn't want to complicate the patch and discuss the arcane
justification. And I was doing a good job of repressing the memory of it
completely before you dredged it up.

> Not that I tested anythign that evil anyway, nor do I think we really care
> if it means that the gate_vma doesn't get shown in the core-dump either.

I didn't test it either. And I do sort of think the vDSO should be (the
only thing) in that case's core dump on anal principle. Now that you've
brought it up, I feel all dirty for ever having sent in code that doesn't
cover the case. (Not that we really care, since CONFIG_COMPAT_VDSO won't
actually be turned on in practice.)

Starting the loops with "vma = current->mm->mmap ?: gate_vma" should do it.
But I guess Andrew would prefer it:

static struct vm_area_struct *first_vma(struct task_struct *tsk,
struct vm_area_struct *gate_vma)
{
struct vm_area_struct *ret = tsk->mm->mmap;
if (ret)
return ret;
return gate_vma;
}

and:

for (vma = first_vma(current); vma; vma = next_vma(vma, gate_vma)) {


Thanks,
Roland

2007-01-24 10:27:00

by Paul Mundt

[permalink] [raw]
Subject: Re: [PATCH 1/11] Fix CONFIG_COMPAT_VDSO

On Wed, Jan 17, 2007 at 01:03:34AM -0800, Roland McGrath wrote:
> > i think your patches #1...#7 are must-haves for v2.6.20, while #8-#11
> > could be delayed to v2.6.21?
>
> Indeed 1-7 are fixes while 8-11 are only cleanups not changing behavior.
>
Here's an update for the SH bits when the 8-11 parts are ready..

Signed-off-by: Paul Mundt <[email protected]>

diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c
index deb4694..7b0f66f 100644
--- a/arch/sh/kernel/vsyscall/vsyscall.c
+++ b/arch/sh/kernel/vsyscall/vsyscall.c
@@ -37,11 +37,12 @@ __setup("vdso=", vdso_setup);
* of the ELF DSO images included therein.
*/
extern const char vsyscall_trapa_start, vsyscall_trapa_end;
-static void *syscall_page;
+static struct page *syscall_pages[1];

int __init vsyscall_init(void)
{
- syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+ void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC);
+ syscall_pages[0] = virt_to_page(syscall_page);

/*
* XXX: Map this page to a fixmap entry if we get around
@@ -55,37 +56,10 @@ int __init vsyscall_init(void)
return 0;
}

-static struct page *syscall_vma_nopage(struct vm_area_struct *vma,
- unsigned long address, int *type)
-{
- unsigned long offset = address - vma->vm_start;
- struct page *page;
-
- if (address < vma->vm_start || address > vma->vm_end)
- return NOPAGE_SIGBUS;
-
- page = virt_to_page(syscall_page + offset);
-
- get_page(page);
-
- return page;
-}
-
-/* Prevent VMA merging */
-static void syscall_vma_close(struct vm_area_struct *vma)
-{
-}
-
-static struct vm_operations_struct syscall_vm_ops = {
- .nopage = syscall_vma_nopage,
- .close = syscall_vma_close,
-};
-
/* Setup a VMA at program startup for the vsyscall page */
int arch_setup_additional_pages(struct linux_binprm *bprm,
int executable_stack)
{
- struct vm_area_struct *vma;
struct mm_struct *mm = current->mm;
unsigned long addr;
int ret;
@@ -97,30 +71,16 @@ int arch_setup_additional_pages(struct l
goto up_fail;
}

- vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
- if (!vma) {
- ret = -ENOMEM;
+ ret = install_special_mapping(mm, addr, PAGE_SIZE,
+ VM_READ | VM_EXEC |
+ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC |
+ VM_ALWAYSDUMP,
+ syscall_pages);
+ if (unlikely(ret))
goto up_fail;
- }
-
- vma->vm_start = addr;
- vma->vm_end = addr + PAGE_SIZE;
- /* MAYWRITE to allow gdb to COW and set breakpoints */
- vma->vm_flags = VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC|VM_MAYWRITE;
- vma->vm_flags |= mm->def_flags;
- vma->vm_page_prot = protection_map[vma->vm_flags & 7];
- vma->vm_ops = &syscall_vm_ops;
- vma->vm_mm = mm;
-
- ret = insert_vm_struct(mm, vma);
- if (unlikely(ret)) {
- kmem_cache_free(vm_area_cachep, vma);
- goto up_fail;
- }

current->mm->context.vdso = (void *)addr;

- mm->total_vm++;
up_fail:
up_write(&mm->mmap_sem);
return ret;