2003-05-02 00:21:37

by Roland McGrath

[permalink] [raw]
Subject: [PATCH] i386 vsyscall DSO implementation, take 2

There was a small bug in the core dump changes in the patch I posted.
I have fixed that. The rest of the patch is unchanged. AFAICT, people
like the idea, and this patch works well for me. Can it go in?


Thanks,
Roland


--- stock-2.5.68/arch/i386/kernel/Makefile Sat Apr 19 19:48:53 2003
+++ linux-2.5.68/arch/i386/kernel/Makefile Wed Apr 23 21:03:25 2003
@@ -27,9 +27,29 @@ obj-$(CONFIG_SOFTWARE_SUSPEND) += suspen
obj-$(CONFIG_X86_NUMAQ) += numaq.o
obj-$(CONFIG_EDD) += edd.o
obj-$(CONFIG_MODULES) += module.o
-obj-y += sysenter.o
+obj-y += sysenter.o vsyscall.o
obj-$(CONFIG_ACPI_SRAT) += srat.o

EXTRA_AFLAGS := -traditional

obj-$(CONFIG_SCx200) += scx200.o
+
+# vsyscall.o contains the vsyscall DSO images as __initdata.
+# We must build both images before we can assemble it.
+$(obj)/vsyscall.o: $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so
+extra-y += $(foreach F,int80 sysenter,vsyscall-$F.o vsyscall-$F.so)
+
+# The DSO images are built using a special linker script.
+$(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \
+$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o
+ $(CC) -nostdlib -shared -s -Wl,-soname=linux-vsyscall.so.1 \
+ -o $@ -Wl,-T,$^
+
+# We also create a special relocatable object that should mirror the symbol
+# table and layout of the linked DSO. With ld -R we can then refer to
+# these symbols in the kernel code rather than hand-coded addresses.
+extra-y += vsyscall-syms.o
+$(obj)/built-in.o: $(obj)/vsyscall-syms.o
+$(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o
+$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds $(obj)/vsyscall-sysenter.o
+ $(CC) -nostdlib -r -o $@ -Wl,-T,$^
--- stock-2.5.68/arch/i386/kernel/entry.S Sat Apr 19 19:48:56 2003
+++ linux-2.5.68/arch/i386/kernel/entry.S Wed Apr 23 20:37:55 2003
@@ -230,8 +230,8 @@ need_resched:
jmp need_resched
#endif

-/* Points to after the "sysenter" instruction in the vsyscall page */
-#define SYSENTER_RETURN 0xffffe010
+/* SYSENTER_RETURN points to after the "sysenter" instruction in
+ the vsyscall page. See vsyscall-sysentry.S, which defines the symbol. */

# sysenter call handler stub
ENTRY(sysenter_entry)
--- stock-2.5.68/arch/i386/kernel/signal.c Sat Apr 19 19:49:25 2003
+++ linux-2.5.68/arch/i386/kernel/signal.c Wed Apr 23 20:35:43 2003
@@ -19,6 +19,7 @@
#include <linux/stddef.h>
#include <linux/personality.h>
#include <linux/suspend.h>
+#include <linux/elf.h>
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
@@ -347,6 +348,10 @@ get_sigframe(struct k_sigaction *ka, str
return (void __user *)((esp - frame_size) & -8ul);
}

+/* These symbols are defined with the addresses in the vsyscall page.
+ See vsyscall-sigreturn.S. */
+extern void __kernel_sigreturn, __kernel_rt_sigreturn;
+
static void setup_frame(int sig, struct k_sigaction *ka,
sigset_t *set, struct pt_regs * regs)
{
@@ -379,7 +384,7 @@ static void setup_frame(int sig, struct
if (err)
goto give_sigsegv;

- restorer = (void *) (fix_to_virt(FIX_VSYSCALL) + 32);
+ restorer = &__kernel_sigreturn;
if (ka->sa.sa_flags & SA_RESTORER)
restorer = ka->sa.sa_restorer;

@@ -462,7 +467,7 @@ static void setup_rt_frame(int sig, stru
goto give_sigsegv;

/* Set up to return from userspace. */
- restorer = (void *) (fix_to_virt(FIX_VSYSCALL) + 64);
+ restorer = &__kernel_rt_sigreturn;
if (ka->sa.sa_flags & SA_RESTORER)
restorer = ka->sa.sa_restorer;
err |= __put_user(restorer, &frame->pretcode);
--- stock-2.5.68/arch/i386/kernel/sysenter.c Sat Apr 19 19:51:16 2003
+++ linux-2.5.68/arch/i386/kernel/sysenter.c Wed Apr 23 02:16:02 2003
@@ -51,151 +51,30 @@ void enable_sep_cpu(void *info)
put_cpu();
}

+/*
+ * These symbols are defined by vsyscall.o to mark the bounds
+ * of the ELF DSO images included therein.
+ */
+extern const char vsyscall_int80_start, vsyscall_int80_end;
+extern const char vsyscall_sysenter_start, vsyscall_sysenter_end;
+
static int __init sysenter_setup(void)
{
- static const char __initdata int80[] = {
- 0xcd, 0x80, /* int $0x80 */
- 0xc3 /* ret */
- };
- /* Unwind information for the int80 code. Keep track of
- where the return address is stored. */
- static const char __initdata int80_eh_frame[] = {
- /* First the Common Information Entry (CIE): */
- 0x14, 0x00, 0x00, 0x00, /* Length of the CIE */
- 0x00, 0x00, 0x00, 0x00, /* CIE Identifier Tag */
- 0x01, /* CIE Version */
- 'z', 'R', 0x00, /* CIE Augmentation */
- 0x01, /* CIE Code Alignment Factor */
- 0x7c, /* CIE Data Alignment Factor */
- 0x08, /* CIE RA Column */
- 0x01, /* Augmentation size */
- 0x1b, /* FDE Encoding (pcrel sdata4) */
- 0x0c, /* DW_CFA_def_cfa */
- 0x04,
- 0x04,
- 0x88, /* DW_CFA_offset, column 0x8 */
- 0x01,
- 0x00, /* padding */
- 0x00,
- /* Now the FDE which contains the instructions for the frame. */
- 0x0a, 0x00, 0x00, 0x00, /* FDE Length */
- 0x1c, 0x00, 0x00, 0x00, /* FDE CIE offset */
- /* The PC-relative offset to the beginning of the code this
- FDE covers. The computation below assumes that the offset
- can be represented in one byte. Change if this is not true
- anymore. The offset from the beginning of the .eh_frame
- is represented by EH_FRAME_OFFSET. The word with the offset
- starts at byte 0x20 of the .eh_frame. */
- 0x100 - (EH_FRAME_OFFSET + 0x20),
- 0xff, 0xff, 0xff, /* FDE initial location */
- 3, /* FDE address range */
- 0x00 /* Augmentation size */
- /* The code does not change the stack pointer. We need not
- record any operations. */
- };
- static const char __initdata sysent[] = {
- 0x51, /* push %ecx */
- 0x52, /* push %edx */
- 0x55, /* push %ebp */
- /* 3: backjump target */
- 0x89, 0xe5, /* movl %esp,%ebp */
- 0x0f, 0x34, /* sysenter */
-
- /* 7: align return point with nop's to make disassembly easier */
- 0x90, 0x90, 0x90, 0x90,
- 0x90, 0x90, 0x90,
-
- /* 14: System call restart point is here! (SYSENTER_RETURN - 2) */
- 0xeb, 0xf3, /* jmp to "movl %esp,%ebp" */
- /* 16: System call normal return point is here! (SYSENTER_RETURN in entry.S) */
- 0x5d, /* pop %ebp */
- 0x5a, /* pop %edx */
- 0x59, /* pop %ecx */
- 0xc3 /* ret */
- };
- /* Unwind information for the sysenter code. Keep track of
- where the return address is stored. */
- static const char __initdata sysent_eh_frame[] = {
- /* First the Common Information Entry (CIE): */
- 0x14, 0x00, 0x00, 0x00, /* Length of the CIE */
- 0x00, 0x00, 0x00, 0x00, /* CIE Identifier Tag */
- 0x01, /* CIE Version */
- 'z', 'R', 0x00, /* CIE Augmentation */
- 0x01, /* CIE Code Alignment Factor */
- 0x7c, /* CIE Data Alignment Factor */
- 0x08, /* CIE RA Column */
- 0x01, /* Augmentation size */
- 0x1b, /* FDE Encoding (pcrel sdata4) */
- 0x0c, /* DW_CFA_def_cfa */
- 0x04,
- 0x04,
- 0x88, /* DW_CFA_offset, column 0x8 */
- 0x01,
- 0x00, /* padding */
- 0x00,
- /* Now the FDE which contains the instructions for the frame. */
- 0x22, 0x00, 0x00, 0x00, /* FDE Length */
- 0x1c, 0x00, 0x00, 0x00, /* FDE CIE offset */
- /* The PC-relative offset to the beginning of the code this
- FDE covers. The computation below assumes that the offset
- can be represented in one byte. Change if this is not true
- anymore. The offset from the beginning of the .eh_frame
- is represented by EH_FRAME_OFFSET. The word with the offset
- starts at byte 0x20 of the .eh_frame. */
- 0x100 - (EH_FRAME_OFFSET + 0x20),
- 0xff, 0xff, 0xff, /* FDE initial location */
- 0x14, 0x00, 0x00, 0x00, /* FDE address range */
- 0x00, /* Augmentation size */
- /* What follows are the instructions for the table generation.
- We have to record all changes of the stack pointer and
- callee-saved registers. */
- 0x41, /* DW_CFA_advance_loc+1, push %ecx */
- 0x0e, /* DW_CFA_def_cfa_offset */
- 0x08, /* RA at offset 8 now */
- 0x41, /* DW_CFA_advance_loc+1, push %edx */
- 0x0e, /* DW_CFA_def_cfa_offset */
- 0x0c, /* RA at offset 12 now */
- 0x41, /* DW_CFA_advance_loc+1, push %ebp */
- 0x0e, /* DW_CFA_def_cfa_offset */
- 0x10, /* RA at offset 16 now */
- 0x85, 0x04, /* DW_CFA_offset %ebp -16 */
- /* Finally the epilogue. */
- 0x4e, /* DW_CFA_advance_loc+14, pop %ebx */
- 0x0e, /* DW_CFA_def_cfa_offset */
- 0x12, /* RA at offset 12 now */
- 0xc5, /* DW_CFA_restore %ebp */
- 0x41, /* DW_CFA_advance_loc+1, pop %edx */
- 0x0e, /* DW_CFA_def_cfa_offset */
- 0x08, /* RA at offset 8 now */
- 0x41, /* DW_CFA_advance_loc+1, pop %ecx */
- 0x0e, /* DW_CFA_def_cfa_offset */
- 0x04 /* RA at offset 4 now */
- };
- static const char __initdata sigreturn[] = {
- /* 32: sigreturn point */
- 0x58, /* popl %eax */
- 0xb8, __NR_sigreturn, 0, 0, 0, /* movl $__NR_sigreturn, %eax */
- 0xcd, 0x80, /* int $0x80 */
- };
- static const char __initdata rt_sigreturn[] = {
- /* 64: rt_sigreturn point */
- 0xb8, __NR_rt_sigreturn, 0, 0, 0, /* movl $__NR_rt_sigreturn, %eax */
- 0xcd, 0x80, /* int $0x80 */
- };
unsigned long page = get_zeroed_page(GFP_ATOMIC);

__set_fixmap(FIX_VSYSCALL, __pa(page), PAGE_READONLY);
- memcpy((void *) page, int80, sizeof(int80));
- memcpy((void *)(page + 32), sigreturn, sizeof(sigreturn));
- memcpy((void *)(page + 64), rt_sigreturn, sizeof(rt_sigreturn));
- memcpy((void *)(page + EH_FRAME_OFFSET), int80_eh_frame,
- sizeof(int80_eh_frame));
- if (!boot_cpu_has(X86_FEATURE_SEP))
+
+ if (!boot_cpu_has(X86_FEATURE_SEP)) {
+ memcpy((void *) page,
+ &vsyscall_int80_start,
+ &vsyscall_int80_end - &vsyscall_int80_start);
return 0;
+ }
+
+ memcpy((void *) page,
+ &vsyscall_sysenter_start,
+ &vsyscall_sysenter_end - &vsyscall_sysenter_start);

- memcpy((void *) page, sysent, sizeof(sysent));
- memcpy((void *)(page + EH_FRAME_OFFSET), sysent_eh_frame,
- sizeof(sysent_eh_frame));
on_each_cpu(enable_sep_cpu, NULL, 1, 1);
return 0;
}
--- stock-2.5.68/arch/i386/kernel/vsyscall-int80.S Wed Dec 31 16:00:00 1969
+++ linux-2.5.68/arch/i386/kernel/vsyscall-int80.S Wed Apr 23 20:41:41 2003
@@ -0,0 +1,48 @@
+/*
+ * Code for the vsyscall page. This version uses the old int $0x80 method.
+ */
+
+ .text
+ .globl __kernel_vsyscall
+ .type __kernel_vsyscall,@function
+__kernel_vsyscall:
+.LSTART_vsyscall:
+ int $0x80
+ ret
+.LEND_vsyscall:
+ .size __kernel_vsyscall,.-.LSTART_vsyscall
+ .previous
+
+ .section .eh_frame,"a",@progbits
+.LSTARTFRAMEDLSI:
+ .long .LENDCIEDLSI-.LSTARTCIEDLSI
+.LSTARTCIEDLSI:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .string "zR" /* NUL-terminated augmentation string */
+ .uleb128 1 /* Code alignment factor */
+ .sleb128 -4 /* Data alignment factor */
+ .byte 8 /* Return address register column */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+ .byte 0x0c /* DW_CFA_def_cfa */
+ .uleb128 4
+ .uleb128 4
+ .byte 0x88 /* DW_CFA_offset, column 0x8 */
+ .uleb128 1
+ .align 4
+.LENDCIEDLSI:
+ .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
+.LSTARTFDEDLSI:
+ .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
+ .long .LSTART_vsyscall-. /* PC-relative start address */
+ .long .LEND_vsyscall-.LSTART_vsyscall
+ .uleb128 0
+ .align 4
+.LENDFDEDLSI:
+ .previous
+
+/*
+ * Get the common code for the sigreturn entry points.
+ */
+#include "vsyscall-sigreturn.S"
--- stock-2.5.68/arch/i386/kernel/vsyscall-sysenter.S Wed Dec 31 16:00:00 1969
+++ linux-2.5.68/arch/i386/kernel/vsyscall-sysenter.S Wed Apr 23 23:13:14 2003
@@ -0,0 +1,97 @@
+/*
+ * Code for the vsyscall page. This version uses the sysenter instruction.
+ */
+
+ .text
+ .globl __kernel_vsyscall
+ .type __kernel_vsyscall,@function
+__kernel_vsyscall:
+.LSTART_vsyscall:
+ push %ecx
+.Lpush_ecx:
+ push %edx
+.Lpush_edx:
+ push %ebp
+.Lenter_kernel:
+ movl %esp,%ebp
+ sysenter
+
+ /* 7: align return point with nop's to make disassembly easier */
+ .space 7,0x90
+
+ /* 14: System call restart point is here! (SYSENTER_RETURN - 2) */
+ jmp .Lenter_kernel
+ /* 16: System call normal return point is here! */
+ .globl SYSENTER_RETURN /* Symbol used by entry.S. */
+SYSENTER_RETURN:
+ pop %ebp
+.Lpop_ebp:
+ pop %edx
+.Lpop_edx:
+ pop %ecx
+.Lpop_ecx:
+ ret
+.LEND_vsyscall:
+ .size __kernel_vsyscall,.-.LSTART_vsyscall
+ .previous
+
+ .section .eh_frame,"a",@progbits
+.LSTARTFRAMEDLSI:
+ .long .LENDCIEDLSI-.LSTARTCIEDLSI
+.LSTARTCIEDLSI:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .string "zR" /* NUL-terminated augmentation string */
+ .uleb128 1 /* Code alignment factor */
+ .sleb128 -4 /* Data alignment factor */
+ .byte 8 /* Return address register column */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+ .byte 0x0c /* DW_CFA_def_cfa */
+ .uleb128 4
+ .uleb128 4
+ .byte 0x88 /* DW_CFA_offset, column 0x8 */
+ .uleb128 1
+ .align 4
+.LENDCIEDLSI:
+ .long .LENDFDEDLSI-.LSTARTFDEDLSI /* Length FDE */
+.LSTARTFDEDLSI:
+ .long .LSTARTFDEDLSI-.LSTARTFRAMEDLSI /* CIE pointer */
+ .long .LSTART_vsyscall-. /* PC-relative start address */
+ .long .LEND_vsyscall-.LSTART_vsyscall
+ .uleb128 0
+ /* What follows are the instructions for the table generation.
+ We have to record all changes of the stack pointer. */
+ .byte 0x04 /* DW_CFA_advance_loc4 */
+ .long .Lpush_ecx-.LSTART_vsyscall
+ .byte 0x0e /* DW_CFA_def_cfa_offset */
+ .byte 0x08 /* RA at offset 8 now */
+ .byte 0x04 /* DW_CFA_advance_loc4 */
+ .long .Lpush_edx-.Lpush_ecx
+ .byte 0x0e /* DW_CFA_def_cfa_offset */
+ .byte 0x0c /* RA at offset 12 now */
+ .byte 0x04 /* DW_CFA_advance_loc4 */
+ .long .Lenter_kernel-.Lpush_edx
+ .byte 0x0e /* DW_CFA_def_cfa_offset */
+ .byte 0x10 /* RA at offset 16 now */
+ /* Finally the epilogue. */
+ .byte 0x04 /* DW_CFA_advance_loc4 */
+ .long .Lpop_ebp-.Lenter_kernel
+ .byte 0x0e /* DW_CFA_def_cfa_offset */
+ .byte 0x12 /* RA at offset 12 now */
+ .byte 0x04 /* DW_CFA_advance_loc4 */
+ .long .Lpop_edx-.Lpop_ebp
+ .byte 0x0e /* DW_CFA_def_cfa_offset */
+ .byte 0x08 /* RA at offset 8 now */
+ .byte 0x04 /* DW_CFA_advance_loc4 */
+ .long .Lpop_ecx-.Lpop_edx
+ .byte 0x0e /* DW_CFA_def_cfa_offset */
+ .byte 0x04 /* RA at offset 4 now */
+ .align 4
+.LENDFDEDLSI:
+ .previous
+
+/*
+ * Get the common code for the sigreturn entry points.
+ */
+#include "vsyscall-sigreturn.S"
--- stock-2.5.68/arch/i386/kernel/vsyscall-sigreturn.S Wed Dec 31 16:00:00 1969
+++ linux-2.5.68/arch/i386/kernel/vsyscall-sigreturn.S Wed Apr 23 20:43:16 2003
@@ -0,0 +1,38 @@
+/*
+ * Common code for the sigreturn entry points on the vsyscall page.
+ * So far this code is the same for both int80 and sysenter versions.
+ * This file is #include'd by vsyscall-*.S to define them after the
+ * vsyscall entry point. The addresses we get for these entry points
+ * by doing ".balign 32" must match in both versions of the page.
+ */
+
+#include <asm/unistd.h>
+
+
+/* XXX
+ Should these be named "_sigtramp" or something?
+*/
+
+ .text
+ .balign 32
+ .globl __kernel_sigreturn
+ .type __kernel_sigreturn,@function
+__kernel_sigreturn:
+.LSTART_kernel_sigreturn:
+ popl %eax /* XXX does this mean it needs unwind info? */
+ movl $__NR_sigreturn, %eax
+ int $0x80
+.LEND_sigreturn:
+ .size __kernel_sigreturn,.-.LSTART_sigreturn
+
+ .text
+ .balign 32
+ .globl __kernel_rt_sigreturn
+ .type __kernel_rt_sigreturn,@function
+__kernel_rt_sigreturn:
+.LSTART_kernel_rt_sigreturn:
+ movl $__NR_rt_sigreturn, %eax
+ int $0x80
+.LEND_rt_sigreturn:
+ .size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
+ .previous
--- stock-2.5.68/arch/i386/kernel/vsyscall.lds Wed Dec 31 16:00:00 1969
+++ linux-2.5.68/arch/i386/kernel/vsyscall.lds Wed Apr 23 20:59:12 2003
@@ -0,0 +1,67 @@
+/*
+ * Linker script for vsyscall DSO. The vsyscall page is an ELF shared
+ * object prelinked to its virtual address, and with only one read-only
+ * segment (that fits in one page). This script controls its layout.
+ */
+
+/* This must match <asm/fixmap.h>. */
+VSYSCALL_BASE = 0xffffe000;
+
+SECTIONS
+{
+ . = VSYSCALL_BASE + SIZEOF_HEADERS;
+
+ .hash : { *(.hash) } :text
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .gnu.version : { *(.gnu.version) }
+ .gnu.version_d : { *(.gnu.version_d) }
+ .gnu.version_r : { *(.gnu.version_r) }
+
+ /* This linker script is used both with -r and with -shared.
+ For the layouts to match, we need to skip more than enough
+ space for the dynamic symbol table et al. If this amount
+ is insufficient, ld -shared will barf. Just increase it here. */
+ . = VSYSCALL_BASE + 0x400;
+
+ .text : { *(.text) } :text =0x90909090
+
+ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr
+ .eh_frame : { KEEP (*(.eh_frame)) } :text
+ .dynamic : { *(.dynamic) } :text :dynamic
+ .useless : {
+ *(.got.plt) *(.got)
+ *(.data .data.* .gnu.linkonce.d.*)
+ *(.dynbss)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ } :text
+}
+
+/*
+ * We must supply the ELF program headers explicitly to get just one
+ * PT_LOAD segment, and set the flags explicitly to make segments read-only.
+ */
+PHDRS
+{
+ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */
+ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */
+ eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */
+}
+
+/*
+ * This controls what symbols we export from the DSO.
+ */
+VERSION
+{
+ LINUX_2.5 {
+ global:
+ __kernel_vsyscall;
+ __kernel_sigreturn;
+ __kernel_rt_sigreturn;
+
+ local: *;
+ };
+}
+
+/* The ELF entry point can be used to set the AT_SYSINFO value. */
+ENTRY(__kernel_vsyscall);
--- stock-2.5.68/fs/binfmt_elf.c Sat Apr 19 19:49:23 2003
+++ linux-2.5.68/fs/binfmt_elf.c Wed Apr 23 12:54:07 2003
@@ -1260,6 +1260,9 @@ static int elf_core_dump(long signr, str
elf_core_copy_regs(&prstatus->pr_reg, regs);

segs = current->mm->map_count;
+#ifdef ELF_CORE_EXTRA_PHDRS
+ segs += ELF_CORE_EXTRA_PHDRS;
+#endif

/* Set up header */
fill_elf_header(elf, segs+1); /* including notes section */
@@ -1340,6 +1343,10 @@ static int elf_core_dump(long signr, str
DUMP_WRITE(&phdr, sizeof(phdr));
}

+#ifdef ELF_CORE_WRITE_EXTRA_PHDRS
+ ELF_CORE_WRITE_EXTRA_PHDRS;
+#endif
+
/* write out the notes section */
for (i = 0; i < numnote; i++)
if (!writenote(notes + i, file))
@@ -1385,6 +1392,10 @@ static int elf_core_dump(long signr, str
}
}

+#ifdef ELF_CORE_WRITE_EXTRA_DATA
+ ELF_CORE_WRITE_EXTRA_DATA;
+#endif
+
if ((off_t) file->f_pos != offset) {
/* Sanity check */
printk("elf_core_dump: file->f_pos (%ld) != offset (%ld)\n",
--- stock-2.5.68/include/linux/elf.h Sat Apr 19 19:48:52 2003
+++ linux-2.5.68/include/linux/elf.h Wed Apr 23 02:48:09 2003
@@ -29,8 +29,11 @@ typedef __s64 Elf64_Sxword;
#define PT_NOTE 4
#define PT_SHLIB 5
#define PT_PHDR 6
+#define PT_LOOS 0x60000000
+#define PT_HIOS 0x6fffffff
#define PT_LOPROC 0x70000000
#define PT_HIPROC 0x7fffffff
+#define PT_GNU_EH_FRAME 0x6474e550
#define PT_MIPS_REGINFO 0x70000000

/* Flags in the e_flags field of the header */
--- stock-2.5.68/include/asm-i386/elf.h Sat Apr 19 19:50:08 2003
+++ linux-2.5.68/include/asm-i386/elf.h Thu Apr 24 23:24:32 2003
@@ -101,7 +101,7 @@ typedef struct user_fxsr_struct elf_fpxr
* for more of them, start the x86-specific ones at 32.
*/
#define AT_SYSINFO 32
-#define AT_SYSINFO_EH_FRAME 33
+#define AT_SYSINFO_EHDR 33

#ifdef __KERNEL__
#define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX)
@@ -119,15 +119,56 @@ extern void dump_smp_unlazy_fpu(void);
#define ELF_CORE_SYNC dump_smp_unlazy_fpu
#endif

-/* Offset from the beginning of the page where the .eh_frame information
- for the code in the vsyscall page starts. */
-#define EH_FRAME_OFFSET 96
+#define VSYSCALL_BASE (__fix_to_virt(FIX_VSYSCALL))
+#define VSYSCALL_EHDR ((const struct elfhdr *) VSYSCALL_BASE)
+#define VSYSCALL_ENTRY ((unsigned long) &__kernel_vsyscall)
+extern void __kernel_vsyscall;

#define ARCH_DLINFO \
do { \
- NEW_AUX_ENT(AT_SYSINFO, 0xffffe000); \
- NEW_AUX_ENT(AT_SYSINFO_EH_FRAME, \
- 0xffffe000 + EH_FRAME_OFFSET); \
+ NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY); \
+ NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE); \
+} while (0)
+
+/*
+ * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
+ * extra segments containing the vsyscall DSO contents. Dumping its
+ * contents makes post-mortem fully interpretable later without matching up
+ * the same kernel and hardware config to see what PC values meant.
+ * Dumping its extra ELF program headers includes all the other information
+ * a debugger needs to easily find how the vsyscall DSO was being used.
+ */
+#define ELF_CORE_EXTRA_PHDRS (VSYSCALL_EHDR->e_phnum)
+#define ELF_CORE_WRITE_EXTRA_PHDRS \
+do { \
+ const struct elf_phdr *const vsyscall_phdrs = \
+ (const struct elf_phdr *) (VSYSCALL_BASE \
+ + VSYSCALL_EHDR->e_phoff); \
+ int i; \
+ Elf32_Off ofs = 0; \
+ for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \
+ struct elf_phdr phdr = vsyscall_phdrs[i]; \
+ if (phdr.p_type == PT_LOAD) { \
+ ofs = phdr.p_offset = offset; \
+ offset += phdr.p_filesz; \
+ } \
+ else \
+ phdr.p_offset += ofs; \
+ phdr.p_paddr = 0; /* match other core phdrs */ \
+ DUMP_WRITE(&phdr, sizeof(phdr)); \
+ } \
+} while (0)
+#define ELF_CORE_WRITE_EXTRA_DATA \
+do { \
+ const struct elf_phdr *const vsyscall_phdrs = \
+ (const struct elf_phdr *) (VSYSCALL_BASE \
+ + VSYSCALL_EHDR->e_phoff); \
+ int i; \
+ for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \
+ if (vsyscall_phdrs[i].p_type == PT_LOAD) \
+ DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr, \
+ vsyscall_phdrs[i].p_filesz); \
+ } \
} while (0)

#endif


2003-05-03 20:39:35

by Richard Henderson

[permalink] [raw]
Subject: Re: [PATCH] i386 vsyscall DSO implementation, take 2

On Thu, May 01, 2003 at 05:33:30PM -0700, Roland McGrath wrote:
> + /* What follows are the instructions for the table generation.
> + We have to record all changes of the stack pointer. */
> + .byte 0x04 /* DW_CFA_advance_loc4 */
> + .long .Lpush_ecx-.LSTART_vsyscall
> + .byte 0x0e /* DW_CFA_def_cfa_offset */
> + .byte 0x08 /* RA at offset 8 now */
> + .byte 0x04 /* DW_CFA_advance_loc4 */
> + .long .Lpush_edx-.Lpush_ecx
> + .byte 0x0e /* DW_CFA_def_cfa_offset */
> + .byte 0x0c /* RA at offset 12 now */
> + .byte 0x04 /* DW_CFA_advance_loc4 */
> + .long .Lenter_kernel-.Lpush_edx
> + .byte 0x0e /* DW_CFA_def_cfa_offset */
> + .byte 0x10 /* RA at offset 16 now */
> + /* Finally the epilogue. */
> + .byte 0x04 /* DW_CFA_advance_loc4 */
> + .long .Lpop_ebp-.Lenter_kernel
> + .byte 0x0e /* DW_CFA_def_cfa_offset */
> + .byte 0x12 /* RA at offset 12 now */
> + .byte 0x04 /* DW_CFA_advance_loc4 */

You lost the save/restore notes for ebp.

> + .type __kernel_sigreturn,@function
> +__kernel_sigreturn:
> +.LSTART_kernel_sigreturn:
> + popl %eax /* XXX does this mean it needs unwind info? */

Well, yes, but not because of this per-se. The unwind info
for sigreturn will be quite complex because it should expose
the state of the machine after the return. I.e. it would
replace the rather complex code in both gdb and libgcc that
fakes some knowledge of the signal stack frame.

I can try to write it for you if you like.


r~

2003-05-04 06:09:08

by Richard Henderson

[permalink] [raw]
Subject: [PATCH] fix vsyscall unwind information

Re-adds the %ebp save/restore correction I'd given Uli when the
unwind information went into sysenter.c, which somehow got lost
with Roland's reorg.

Also adds unwind info for the sigreturn entry points. This can
be used instead of special-case hacks currently in libgcc and
gdb, and by extension allows the kernel to change these entry
points without breaking userland.

Tested with Roland's patch to make the VDSO available in glibc,
plus a set of fixes to libgcc, since location expressions hadn't
ever been tested before in this context.

Linus, please pull from

bk://are.twiddle.net/unwind-2.5


r~



arch/i386/Makefile | 9 ++
arch/i386/kernel/asm-offsets.c | 31 +++++++++
arch/i386/kernel/sigframe.h | 21 ++++++
arch/i386/kernel/signal.c | 23 -------
arch/i386/kernel/vsyscall-sigreturn.S | 110 +++++++++++++++++++++++++++++++++-
arch/i386/kernel/vsyscall-sysenter.S | 2
6 files changed, 171 insertions(+), 25 deletions(-)

through these ChangeSets:

<[email protected]> (03/05/03 1.1209)
Fix unwind info for sysenter entry point.
Add unwind info for sigreturn entry points.

# This is a BitKeeper generated patch for the following project:
# Project Name: Linux kernel tree
# This patch format is intended for GNU patch command version 2.5 or higher.
# This patch includes the following deltas:
# ChangeSet 1.1208 -> 1.1209
# arch/i386/Makefile 1.49 -> 1.50
# arch/i386/kernel/vsyscall-sysenter.S 1.1 -> 1.2
# arch/i386/kernel/signal.c 1.30 -> 1.31
# arch/i386/kernel/vsyscall-sigreturn.S 1.1 -> 1.2
# (new) -> 1.1 arch/i386/kernel/sigframe.h
# (new) -> 1.1 arch/i386/kernel/asm-offsets.c
#
# The following is the BitKeeper ChangeSet Log
# --------------------------------------------
# 03/05/03 [email protected] 1.1209
# Fix unwind info for sysenter entry point.
# Add unwind info for sigreturn entry points.
# --------------------------------------------
#
diff -Nru a/arch/i386/Makefile b/arch/i386/Makefile
--- a/arch/i386/Makefile Sat May 3 23:03:13 2003
+++ b/arch/i386/Makefile Sat May 3 23:03:13 2003
@@ -114,6 +114,15 @@
install fdimage fdimage144 fdimage288: vmlinux
$(Q)$(MAKE) $(build)=$(boot) BOOTIMAGE=$(BOOTIMAGE) $@

+prepare: include/asm-$(ARCH)/asm_offsets.h
+CLEAN_FILES += include/asm-$(ARCH)/asm_offsets.h
+
+arch/$(ARCH)/kernel/asm-offsets.s: include/asm include/linux/version.h \
+ include/config/MARKER
+
+include/asm-$(ARCH)/asm_offsets.h: arch/$(ARCH)/kernel/asm-offsets.s
+ $(call filechk,gen-asm-offsets)
+
archclean:
$(Q)$(MAKE) $(clean)=arch/i386/boot

diff -Nru a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
--- /dev/null Wed Dec 31 16:00:00 1969
+++ b/arch/i386/kernel/asm-offsets.c Sat May 3 23:03:13 2003
@@ -0,0 +1,31 @@
+/*
+ * Generate definitions needed by assembly language modules.
+ * This code generates raw asm output which is post-processed
+ * to extract and format the required data.
+ */
+
+#include <linux/signal.h>
+#include <asm/ucontext.h>
+#include "sigframe.h"
+
+#define DEFINE(sym, val) \
+ asm volatile("\n->" #sym " %0 " #val : : "i" (val))
+
+#define BLANK() asm volatile("\n->" : : )
+
+void foo(void)
+{
+ DEFINE(SIGCONTEXT_eax, offsetof (struct sigcontext, eax));
+ DEFINE(SIGCONTEXT_ebx, offsetof (struct sigcontext, ebx));
+ DEFINE(SIGCONTEXT_ecx, offsetof (struct sigcontext, ecx));
+ DEFINE(SIGCONTEXT_edx, offsetof (struct sigcontext, edx));
+ DEFINE(SIGCONTEXT_esi, offsetof (struct sigcontext, esi));
+ DEFINE(SIGCONTEXT_edi, offsetof (struct sigcontext, edi));
+ DEFINE(SIGCONTEXT_ebp, offsetof (struct sigcontext, ebp));
+ DEFINE(SIGCONTEXT_esp, offsetof (struct sigcontext, esp));
+ DEFINE(SIGCONTEXT_eip, offsetof (struct sigcontext, eip));
+ BLANK();
+
+ DEFINE(RT_SIGFRAME_sigcontext,
+ offsetof (struct rt_sigframe, uc.uc_mcontext));
+}
diff -Nru a/arch/i386/kernel/sigframe.h b/arch/i386/kernel/sigframe.h
--- /dev/null Wed Dec 31 16:00:00 1969
+++ b/arch/i386/kernel/sigframe.h Sat May 3 23:03:13 2003
@@ -0,0 +1,21 @@
+struct sigframe
+{
+ char *pretcode;
+ int sig;
+ struct sigcontext sc;
+ struct _fpstate fpstate;
+ unsigned long extramask[_NSIG_WORDS-1];
+ char retcode[8];
+};
+
+struct rt_sigframe
+{
+ char *pretcode;
+ int sig;
+ struct siginfo *pinfo;
+ void *puc;
+ struct siginfo info;
+ struct ucontext uc;
+ struct _fpstate fpstate;
+ char retcode[8];
+};
diff -Nru a/arch/i386/kernel/signal.c b/arch/i386/kernel/signal.c
--- a/arch/i386/kernel/signal.c Sat May 3 23:03:13 2003
+++ b/arch/i386/kernel/signal.c Sat May 3 23:03:13 2003
@@ -23,6 +23,7 @@
#include <asm/ucontext.h>
#include <asm/uaccess.h>
#include <asm/i387.h>
+#include "sigframe.h"

#define DEBUG_SIG 0

@@ -125,28 +126,6 @@
/*
* Do a signal return; undo the signal stack.
*/
-
-struct sigframe
-{
- char *pretcode;
- int sig;
- struct sigcontext sc;
- struct _fpstate fpstate;
- unsigned long extramask[_NSIG_WORDS-1];
- char retcode[8];
-};
-
-struct rt_sigframe
-{
- char *pretcode;
- int sig;
- struct siginfo *pinfo;
- void *puc;
- struct siginfo info;
- struct ucontext uc;
- struct _fpstate fpstate;
- char retcode[8];
-};

static int
restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax)
diff -Nru a/arch/i386/kernel/vsyscall-sigreturn.S b/arch/i386/kernel/vsyscall-sigreturn.S
--- a/arch/i386/kernel/vsyscall-sigreturn.S Sat May 3 23:03:13 2003
+++ b/arch/i386/kernel/vsyscall-sigreturn.S Sat May 3 23:03:13 2003
@@ -7,6 +7,7 @@
*/

#include <asm/unistd.h>
+#include <asm/asm_offsets.h>


/* XXX
@@ -18,21 +19,124 @@
.globl __kernel_sigreturn
.type __kernel_sigreturn,@function
__kernel_sigreturn:
-.LSTART_kernel_sigreturn:
+.LSTART_sigreturn:
popl %eax /* XXX does this mean it needs unwind info? */
movl $__NR_sigreturn, %eax
int $0x80
.LEND_sigreturn:
.size __kernel_sigreturn,.-.LSTART_sigreturn

- .text
.balign 32
.globl __kernel_rt_sigreturn
.type __kernel_rt_sigreturn,@function
__kernel_rt_sigreturn:
-.LSTART_kernel_rt_sigreturn:
+.LSTART_rt_sigreturn:
movl $__NR_rt_sigreturn, %eax
int $0x80
.LEND_rt_sigreturn:
.size __kernel_rt_sigreturn,.-.LSTART_rt_sigreturn
+ .previous
+
+ .section .eh_frame,"a",@progbits
+.LSTARTFRAMEDLSI1:
+ .long .LENDCIEDLSI1-.LSTARTCIEDLSI1
+.LSTARTCIEDLSI1:
+ .long 0 /* CIE ID */
+ .byte 1 /* Version number */
+ .string "zR" /* NUL-terminated augmentation string */
+ .uleb128 1 /* Code alignment factor */
+ .sleb128 -4 /* Data alignment factor */
+ .byte 8 /* Return address register column */
+ .uleb128 1 /* Augmentation value length */
+ .byte 0x1b /* DW_EH_PE_pcrel|DW_EH_PE_sdata4. */
+ .byte 0 /* DW_CFA_nop */
+ .align 4
+.LENDCIEDLSI1:
+ .long .LENDFDEDLSI1-.LSTARTFDEDLSI1 /* Length FDE */
+.LSTARTFDEDLSI1:
+ .long .LSTARTFDEDLSI1-.LSTARTFRAMEDLSI1 /* CIE pointer */
+ /* HACK: The dwarf2 unwind routines will subtract 1 from the
+ return address to get an address in the middle of the
+ presumed call instruction. Since we didn't get here via
+ a call, we need to include the nop before the real start
+ to make up for it. */
+ .long .LSTART_sigreturn-1-. /* PC-relative start address */
+ .long .LEND_sigreturn-.LSTART_sigreturn+1
+ .uleb128 0 /* Augmentation */
+ /* What follows are the instructions for the table generation.
+ We record the locations of each register saved. This is
+ complicated by the fact that the "CFA" is always assumed to
+ be the value of the stack pointer in the caller. This means
+ that we must define the CFA of this body of code to be the
+ saved value of the stack pointer in the sigcontext. Which
+ also means that there is no fixed relation to the other
+ saved registers, which means that we must use DW_CFA_expression
+ to compute their addresses. It also means that when we
+ adjust the stack with the popl, we have to do it all over again. */
+
+#define do_cfa_expr(offset) \
+ .byte 0x0f; /* DW_CFA_def_cfa_expression */ \
+ .uleb128 1f-0f; /* length */ \
+0: .byte 0x74; /* DW_OP_breg4 */ \
+ .sleb128 offset; /* offset */ \
+ .byte 0x06; /* DW_OP_deref */ \
+1:
+
+#define do_expr(regno, offset) \
+ .byte 0x10; /* DW_CFA_expression */ \
+ .uleb128 regno; /* regno */ \
+ .uleb128 1f-0f; /* length */ \
+0: .byte 0x74; /* DW_OP_breg4 */ \
+ .sleb128 offset; /* offset */ \
+1:
+
+ do_cfa_expr(SIGCONTEXT_esp+4)
+ do_expr(0, SIGCONTEXT_eax+4)
+ do_expr(1, SIGCONTEXT_ecx+4)
+ do_expr(2, SIGCONTEXT_edx+4)
+ do_expr(3, SIGCONTEXT_ebx+4)
+ do_expr(5, SIGCONTEXT_ebp+4)
+ do_expr(6, SIGCONTEXT_esi+4)
+ do_expr(7, SIGCONTEXT_edi+4)
+ do_expr(8, SIGCONTEXT_eip+4)
+
+ .byte 0x42 /* DW_CFA_advance_loc 2 -- nop; popl eax. */
+
+ do_cfa_expr(SIGCONTEXT_esp)
+ do_expr(0, SIGCONTEXT_eax)
+ do_expr(1, SIGCONTEXT_ecx)
+ do_expr(2, SIGCONTEXT_edx)
+ do_expr(3, SIGCONTEXT_ebx)
+ do_expr(5, SIGCONTEXT_ebp)
+ do_expr(6, SIGCONTEXT_esi)
+ do_expr(7, SIGCONTEXT_edi)
+ do_expr(8, SIGCONTEXT_eip)
+
+ .align 4
+.LENDFDEDLSI1:
+
+ .long .LENDFDEDLSI2-.LSTARTFDEDLSI2 /* Length FDE */
+.LSTARTFDEDLSI2:
+ .long .LSTARTFDEDLSI2-.LSTARTFRAMEDLSI1 /* CIE pointer */
+ /* HACK: See above wrt unwind library assumptions. */
+ .long .LSTART_rt_sigreturn-1-. /* PC-relative start address */
+ .long .LEND_rt_sigreturn-.LSTART_rt_sigreturn+1
+ .uleb128 0 /* Augmentation */
+ /* What follows are the instructions for the table generation.
+ We record the locations of each register saved. This is
+ slightly less complicated than the above, since we don't
+ modify the stack pointer in the process. */
+
+ do_cfa_expr(RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esp)
+ do_expr(0, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eax)
+ do_expr(1, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ecx)
+ do_expr(2, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edx)
+ do_expr(3, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebx)
+ do_expr(5, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_ebp)
+ do_expr(6, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_esi)
+ do_expr(7, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_edi)
+ do_expr(8, RT_SIGFRAME_sigcontext-4 + SIGCONTEXT_eip)
+
+ .align 4
+.LENDFDEDLSI2:
.previous
diff -Nru a/arch/i386/kernel/vsyscall-sysenter.S b/arch/i386/kernel/vsyscall-sysenter.S
--- a/arch/i386/kernel/vsyscall-sysenter.S Sat May 3 23:03:13 2003
+++ b/arch/i386/kernel/vsyscall-sysenter.S Sat May 3 23:03:13 2003
@@ -74,11 +74,13 @@
.long .Lenter_kernel-.Lpush_edx
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x10 /* RA at offset 16 now */
+ .byte 0x85, 0x04 /* DW_CFA_offset %ebp -16 */
/* Finally the epilogue. */
.byte 0x04 /* DW_CFA_advance_loc4 */
.long .Lpop_ebp-.Lenter_kernel
.byte 0x0e /* DW_CFA_def_cfa_offset */
.byte 0x12 /* RA at offset 12 now */
+ .byte 0xc5 /* DW_CFA_restore %ebp */
.byte 0x04 /* DW_CFA_advance_loc4 */
.long .Lpop_edx-.Lpop_ebp
.byte 0x0e /* DW_CFA_def_cfa_offset */

2003-05-04 07:22:31

by Ulrich Drepper

[permalink] [raw]
Subject: Re: [PATCH] fix vsyscall unwind information

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Richard Henderson wrote:

> Also adds unwind info for the sigreturn entry points.

Sweet.


> This can
> be used instead of special-case hacks currently in libgcc and
> gdb, and by extension allows the kernel to change these entry
> points without breaking userland.

Exactly. This means we can get rid of the int $0x80. I.e., move the
sigreturn code in the two .S files for sysenter and int80. Reducing the
cost of signals is always good.

I'm not sure how this use of sigreturn has to look like. Linus, maybe
you should do it yourself.


- --
- --------------. ,-. 444 Castro Street
Ulrich Drepper \ ,-----------------' \ Mountain View, CA 94041 USA
Red Hat `--' drepper at redhat.com `---------------------------
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.2.1 (GNU/Linux)

iD8DBQE+tMKL2ijCOnn/RHQRAoDbAJ4nnP3Hcb8xAeqjRHxgq0YZZmcs0wCdE4BE
Obuu/wRdq70aooscs1JOhto=
=5Sx4
-----END PGP SIGNATURE-----

2003-05-05 06:46:43

by David Mosberger-Tang

[permalink] [raw]
Subject: Re: [PATCH] fix vsyscall unwind information

>>>>> On Sun, 04 May 2003 08:30:10 +0200, Richard Henderson <[email protected]> said:

Richard> Also adds unwind info for the sigreturn entry points. This
Richard> can be used instead of special-case hacks currently in
Richard> libgcc and gdb, and by extension allows the kernel to
Richard> change these entry points without breaking userland.

Is there a marker or some other way to identify the sigreturn as such?
If not, could one be added?

--david

2003-05-05 07:30:21

by Richard Henderson

[permalink] [raw]
Subject: Re: [PATCH] fix vsyscall unwind information

On Sun, May 04, 2003 at 11:49:31PM -0700, David Mosberger-Tang wrote:
> Is there a marker or some other way to identify the sigreturn as such?

No.

> If not, could one be added?

Why? Certainly it isn't needed for x86.


r~

2003-05-05 14:57:20

by davidm

[permalink] [raw]
Subject: Re: [PATCH] fix vsyscall unwind information

>>>>> On Mon, 5 May 2003 00:42:48 -0700, Richard Henderson <[email protected]> said:

>> If not, could one be added?

Richard> Why? Certainly it isn't needed for x86.

Certain applications (such as debuggers) want to know. Sure, you can
do symbol matching (if you have the symbol table) or code-reading
(assuming you know the exact sigreturn sequence), but having a marker
would be more reliable and faster.

--david

2003-05-05 16:53:21

by davidm

[permalink] [raw]
Subject: Re: [PATCH] fix vsyscall unwind information

>>>>> On Mon, 5 May 2003 09:34:44 -0700, Richard Henderson <[email protected]> said:

Richard> On Mon, May 05, 2003 at 08:05:58AM -0700, David
Richard> Mosberger-Tang wrote: Why? Certainly it isn't needed for
Richard> x86.
>> Certain applications (such as debuggers) want to know. Sure,
>> you can do symbol matching (if you have the symbol table) or
>> code-reading (assuming you know the exact sigreturn sequence),
>> but having a marker would be more reliable and faster.

Richard> Eh. The whole point was to *eliminate* the special cases.

Signal handlers have special significance on UNIX-like operating
systems. An application might want to know when it's in a signal
frame. If it's a problem to do this with DWARF2 info, fine. If not,
please consider adding a marker (my current version of libunwind for
x86 does code-reading to detect signal-frames; not pretty, but it
works reasonable well in practice; it would be nicer to get rid of the
code-reading though, in the future).

--david

2003-05-05 16:43:50

by Richard Henderson

[permalink] [raw]
Subject: Re: [PATCH] fix vsyscall unwind information

On Mon, May 05, 2003 at 08:05:58AM -0700, David Mosberger-Tang wrote:
> Richard> Why? Certainly it isn't needed for x86.
>
> Certain applications (such as debuggers) want to know. Sure, you can
> do symbol matching (if you have the symbol table) or code-reading
> (assuming you know the exact sigreturn sequence), but having a marker
> would be more reliable and faster.

Eh. The whole point was to *eliminate* the special cases.

If the debugger does nothing special now, it'll see the symbol
from the VDSO in the backtrace and print __kernel_sigreturn.
Isn't this sufficient for the user to recognize what's going on?
Does it really need to print <signal frame>?



r~

2003-05-05 22:58:20

by Mark Kettenis

[permalink] [raw]
Subject: Re: [PATCH] fix vsyscall unwind information

Richard Henderson <[email protected]> writes:

> On Mon, May 05, 2003 at 08:05:58AM -0700, David Mosberger-Tang wrote:
> > Richard> Why? Certainly it isn't needed for x86.
> >
> > Certain applications (such as debuggers) want to know. Sure, you can
> > do symbol matching (if you have the symbol table) or code-reading
> > (assuming you know the exact sigreturn sequence), but having a marker
> > would be more reliable and faster.
>
> Eh. The whole point was to *eliminate* the special cases.
>
> If the debugger does nothing special now, it'll see the symbol
> from the VDSO in the backtrace and print __kernel_sigreturn.
> Isn't this sufficient for the user to recognize what's going on?
> Does it really need to print <signal frame>?

Unfortunately, GDB needs to be able to recognize signal trampolines in
order to be able to single step correctly when a signal arrives. At
least on some platforms. Could be that the code-path in question
isn't used for Linux/i386, but I vaguely remember it does.

Anyway, signal trampolines could be marked with a special augmentation
in their CIE.

Mark

2003-05-06 00:08:33

by Richard Henderson

[permalink] [raw]
Subject: Re: [PATCH] fix vsyscall unwind information

On Tue, May 06, 2003 at 01:10:37AM +0200, Mark Kettenis wrote:
> Unfortunately, GDB needs to be able to recognize signal trampolines in
> order to be able to single step correctly when a signal arrives.

If it actually used the dwarf2 unwind information as written,
I wouldn't expect this to be true.

> Anyway, signal trampolines could be marked with a special augmentation
> in their CIE.

I'd prefer not, if at all possible.


r~

2003-05-06 06:06:48

by davidm

[permalink] [raw]
Subject: Re: [PATCH] fix vsyscall unwind information

>>>>> On Mon, 5 May 2003 17:21:00 -0700, Richard Henderson <[email protected]> said:

>> Anyway, signal trampolines could be marked with a special
>> augmentation in their CIE.

Richard> I'd prefer not, if at all possible.

Why? I bet you'll live to regret it if you don't add it now. ;-)

--david